module Database.Sql.Hive.Parser.Token where
import Database.Sql.Hive.Token
import Database.Sql.Hive.Parser.Internal
import Database.Sql.Position
import qualified Text.Parsec as P
import qualified Text.Parsec.Pos as P
import Data.Char (isDigit)
import Data.String
import Data.Text.Lazy hiding (foldl1, map, head, last, all, null, init)
import qualified Data.Text.Lazy.Encoding as TL
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as BL
import Data.Semigroup ((<>))
showTok :: (Token, Position, Position) -> String
showTok (t, _, _) = show t
posFromTok :: P.SourcePos ->
(Token, Position, Position) ->
[(Token, Position, Position)] ->
P.SourcePos
posFromTok _ (_, pos, _) _ = flip P.setSourceLine (fromEnum $ positionLine pos)
$ flip P.setSourceColumn (fromEnum $ positionColumn pos)
$ P.initialPos "-"
tokEqualsP :: Token -> Parser Range
tokEqualsP tok = P.tokenPrim showTok posFromTok testTok
where
testTok (tok', s, e) =
if tok == tok'
then Just $ Range s e
else Nothing
tokNotEqualsP :: Token -> Parser Range
tokNotEqualsP tok = P.tokenPrim showTok posFromTok testTok
where
testTok (tok', s, e) =
if tok /= tok'
then Just $ Range s e
else Nothing
testNameTok :: (Token, Position, Position) -> Maybe (Text, Range)
testNameTok (tok, s, e) =
case tok of
TokWord _ name -> Just (name, Range s e)
_ -> Nothing
variableSubstitutionP :: Parser Range
variableSubstitutionP = P.tokenPrim showTok posFromTok testVariableTok
where
testVariableTok (tok, s, e) =
case tok of
TokVariable _ _ -> Just (Range s e)
_ -> Nothing
typeNameP :: Parser (Text, Range)
typeNameP = P.tokenPrim showTok posFromTok testNameTok
nodeNameP :: Parser (Text, Range)
nodeNameP = P.tokenPrim showTok posFromTok testNameTok
structFieldNameP :: Parser (Text, Range)
structFieldNameP = P.tokenPrim showTok posFromTok testNameTok
windowNameP :: Parser (Text, Range)
windowNameP = P.tokenPrim showTok posFromTok testNameTok
datePartP :: Parser (Text, Range)
datePartP = P.tokenPrim showTok posFromTok testTok
where
testTok (tok, s, e) = case tok of
TokWord _ name | toLower name `elem` parts -> Just (toLower name, Range s e)
_ -> Nothing
parts = [ "year", "yy", "yyyy"
, "quarter", "qq", "q"
, "month", "mm", "m"
, "day", "dd", "d", "dy", "dayofyear", "y"
, "week", "wk", "ww"
, "hour", "hh"
, "minute", "mi", "n"
, "second", "ss", "s"
, "millisecond", "ms"
, "microsecond", "mcs", "us"
]
schemaNameP :: Parser (Text, Range)
schemaNameP = P.tokenPrim showTok posFromTok testTok
where
testTok (tok, s, e) = case tok of
TokWord True name -> Just (name, Range s e)
TokWord False name
| wordCanBeSchemaName (wordInfo name) -> Just (name, Range s e)
_ -> Nothing
tableNameP :: Parser (Text, Range)
tableNameP = P.tokenPrim showTok posFromTok testTok
where
testTok (tok, s, e) = case tok of
TokWord True name -> Just (name, Range s e)
TokWord False name
| wordCanBeTableName (wordInfo name) -> Just (name, Range s e)
_ -> Nothing
projectionNameP :: Parser (Text, Range)
projectionNameP = P.tokenPrim showTok posFromTok testTok
where
testTok (tok, s, e) = case tok of
TokWord True name -> Just (name, Range s e)
TokWord False name
| wordCanBeTableName (wordInfo name) -> Just (name, Range s e)
_ -> Nothing
columnNameP :: Parser (Text, Range)
columnNameP = P.tokenPrim showTok posFromTok testTok
where
testTok (tok, s, e) = case tok of
TokWord True name -> Just (name, Range s e)
TokWord False name
| wordCanBeColumnName (wordInfo name) -> Just (name, Range s e)
_ -> Nothing
functionNameP :: Parser (Text, Range)
functionNameP = P.tokenPrim showTok posFromTok testTok
where
testTok (tok, s, e) = case tok of
TokWord True name -> Just (name, Range s e)
TokWord False name
| wordCanBeFunctionName (wordInfo name) ->
Just (name, Range s e)
_ -> Nothing
propertyValuePartP :: Parser (Text, Range)
propertyValuePartP = textUntilP [";"]
propertyNameP :: Parser (Text, Range)
propertyNameP = textUntilP ["=", ";"]
textUntilP :: [Text] -> Parser (Text, Range)
textUntilP x = do
res <- P.many anyTokenExceptX
let name = Data.Text.Lazy.concat $ fst <$> res
s = snd $ head res
e = snd $ last res
pure (name, s <> e)
where
anyTokenExceptX :: Parser (Text, Range)
anyTokenExceptX = P.tokenPrim showTok posFromTok $
\ (tok, s, e) -> case tok of
TokSymbol t | not (t `elem` x) ->
Just (t, Range s e)
TokWord _ t | not (t `elem` x) ->
Just (t, Range s e)
TokNumber t | not (t `elem` x) ->
Just (t, Range s e)
_ -> Nothing
keywordP :: Text -> Parser Range
keywordP keyword = P.tokenPrim showTok posFromTok testTok
where
testTok (tok, s, e) = case tok of
TokWord False name
| name == keyword -> Just (Range s e)
_ -> Nothing
fieldTypeP :: Parser (Text, Range)
fieldTypeP = P.tokenPrim showTok posFromTok testTok
where
isField :: (Eq s, IsString s) => s -> Bool
isField = flip elem
[ "century", "day", "decade", "doq", "dow", "doy", "epoch"
, "hour", "isodow", "isoweek", "isoyear", "microseconds"
, "millennium", "milliseconds", "minute", "month", "quarter"
, "second", "time zone", "timezone_hour", "timezone_minute"
, "week", "year"
]
testTok (tok, s, e) = case tok of
TokWord _ field | isField field -> Just (field, Range s e)
TokString field | isField field -> Just (TL.decodeUtf8 field, Range s e)
_ -> Nothing
periodP :: Parser (Text, Range)
periodP = P.tokenPrim showTok posFromTok testTok
where
isPeriod :: (Eq s, IsString s) => s -> Bool
isPeriod = flip elem [ "day", "hour", "minute", "month", "second", "year" ]
testTok (tok, s, e) = case tok of
TokWord _ period | isPeriod period -> Just (period, Range s e)
TokString period | isPeriod period -> Just (TL.decodeUtf8 period, Range s e)
_ -> Nothing
byteAmountP :: Parser (Text, Range)
byteAmountP = P.tokenPrim showTok posFromTok testTok
where
isByteAmount :: Text -> Bool
isByteAmount text =
let str = unpack $ toLower text
in and [ not $ null str
, last str `elem` ['b', 'k', 'm', 'g']
, all isDigit $ init str
]
testTok (tok, s, e) = case tok of
TokWord False byteAmount | isByteAmount byteAmount -> Just (byteAmount, Range s e)
_ -> Nothing
stringP :: Parser (ByteString, Range)
stringP = do
tokens <- P.many1 singleStringTokenP
let s = foldl1 BL.append $ map fst tokens
r = foldl1 (<>) $ map snd tokens
pure $ (s, r)
where
singleStringTokenP :: Parser (ByteString, Range)
singleStringTokenP = P.tokenPrim showTok posFromTok testTok
where
testTok (tok, s, e) = case tok of
TokString string -> Just (string, Range s e)
_ -> Nothing
numberP :: Parser (Text, Range)
numberP = P.tokenPrim showTok posFromTok testTok
where
testTok (tok, s, e) = case tok of
TokNumber number -> Just (number, Range s e)
_ -> Nothing
dotP :: Parser Range
dotP = symbolP "."
equalP :: Parser Range
equalP = symbolP "="
colonP :: Parser Range
colonP = symbolP ":"
symbolP :: Text -> Parser Range
symbolP op = tokEqualsP $ TokSymbol op
starP :: Parser Range
starP = symbolP "*"
openP :: Parser Range
openP = symbolP "("
closeP :: Parser Range
closeP = symbolP ")"
openBracketP :: Parser Range
openBracketP = symbolP "["
closeBracketP :: Parser Range
closeBracketP = symbolP "]"
openAngleP :: Parser Range
openAngleP = symbolP "<"
closeAngleP :: Parser Range
closeAngleP = symbolP ">"
castP :: Parser Range
castP = keywordP "cast"
castOpP :: Parser Range
castOpP = symbolP "::"
minusP :: Parser Range
minusP = symbolP "-"
accessRankP :: Parser Range
accessRankP = keywordP "accessrank"
addP :: Parser Range
addP = keywordP "add"
afterP :: Parser Range
afterP = keywordP "after"
allP :: Parser Range
allP = keywordP "all"
alterP :: Parser Range
alterP = keywordP "alter"
analyzeP :: Parser Range
analyzeP = keywordP "analyze"
andP :: Parser Range
andP = keywordP "and"
arrayP :: Parser Range
arrayP = keywordP "array"
asP :: Parser Range
asP = keywordP "as"
ascP :: Parser Range
ascP = keywordP "asc"
atP :: Parser Range
atP = keywordP "at"
autoP :: Parser Range
autoP = keywordP "auto"
avroP :: Parser Range
avroP = keywordP "avro"
bestP :: Parser Range
bestP = keywordP "best"
betweenP :: Parser Range
betweenP = keywordP "between"
bucketP :: Parser Range
bucketP = keywordP "bucket"
bucketsP :: Parser Range
bucketsP = keywordP "buckets"
byP :: Parser Range
byP = keywordP "by"
cacheP :: Parser Range
cacheP = keywordP "cache"
cascadeP :: Parser Range
cascadeP = keywordP "cascade"
caseP :: Parser Range
caseP = keywordP "case"
changeP :: Parser Range
changeP = keywordP "change"
clusterP :: Parser Range
clusterP = keywordP "cluster"
clusteredP :: Parser Range
clusteredP = keywordP "clustered"
collectionP :: Parser Range
collectionP = keywordP "collection"
columnP :: Parser Range
columnP = keywordP "column"
columnsP :: Parser Range
columnsP = keywordP "columns"
commaP :: Parser Range
commaP = symbolP ","
commentP :: Parser Range
commentP = keywordP "comment"
commitP :: Parser Range
commitP = keywordP "commit"
computeP :: Parser Range
computeP = keywordP "compute"
createP :: Parser Range
createP = keywordP "create"
crossP :: Parser Range
crossP = keywordP "cross"
cubeP :: Parser Range
cubeP = keywordP "cube"
currentP :: Parser Range
currentP = keywordP "current"
currentDatabaseP :: Parser (Text, Range)
currentDatabaseP = ("current_database",) <$> keywordP "current_database"
currentDateP :: Parser (Text, Range)
currentDateP = ("current_date",) <$> keywordP "current_date"
currentSchemaP :: Parser (Text, Range)
currentSchemaP = ("current_schema",) <$> keywordP "current_schema"
currentTimeP :: Parser (Text, Range)
currentTimeP = ("current_time",) <$> keywordP "current_time"
currentTimestampP :: Parser (Text, Range)
currentTimestampP = ("current_timestamp",) <$> keywordP "current_timestamp"
currentUserP :: Parser (Text, Range)
currentUserP = ("current_user",) <$> keywordP "current_user"
dataP :: Parser Range
dataP = keywordP "data"
databaseP :: Parser Range
databaseP = keywordP "database"
dateDiffP :: Parser Range
dateDiffP = keywordP "datediff"
dbPropertiesP :: Parser Range
dbPropertiesP = keywordP "dbproperties"
defaultP :: Parser Range
defaultP = keywordP "default"
definedP :: Parser Range
definedP = keywordP "defined"
deleteP :: Parser Range
deleteP = keywordP "delete"
delimitedP :: Parser Range
delimitedP = keywordP "delimited"
descP :: Parser Range
descP = keywordP "desc"
describeP :: Parser Range
describeP = keywordP "describe" P.<|> keywordP "desc"
directoryP :: Parser Range
directoryP = keywordP "directory"
distinctP :: Parser Range
distinctP = keywordP "distinct"
distributeP :: Parser Range
distributeP = keywordP "distribute"
dropP :: Parser Range
dropP = keywordP "drop"
elseP :: Parser Range
elseP = keywordP "else"
encodingP :: Parser Range
encodingP = keywordP "encoding"
endP :: Parser Range
endP = keywordP "end"
escapeP :: Parser Range
escapeP = keywordP "escape"
escapedP :: Parser Range
escapedP = keywordP "escaped"
excludingP :: Parser Range
excludingP = keywordP "excluding"
existsP :: Parser Range
existsP = keywordP "exists"
explainP :: Parser Range
explainP = keywordP "explain"
externalP :: Parser Range
externalP = keywordP "external"
extractP :: Parser Range
extractP = keywordP "extract"
falseP :: Parser Range
falseP = keywordP "false"
fieldsP :: Parser Range
fieldsP = keywordP "fields"
firstP :: Parser Range
firstP = keywordP "first"
followingP :: Parser Range
followingP = keywordP "following"
forP :: Parser Range
forP = keywordP "for"
formatP :: Parser Range
formatP = keywordP "format"
fromP :: Parser Range
fromP = keywordP "from"
functionP :: Parser Range
functionP = keywordP "function"
fullP :: Parser Range
fullP = keywordP "full"
globalP :: Parser Range
globalP = keywordP "global"
grantP :: Parser Range
grantP = keywordP "grant"
groupP :: Parser Range
groupP = keywordP "group"
groupingP :: Parser Range
groupingP = keywordP "grouping"
havingP :: Parser Range
havingP = keywordP "having"
ifP :: Parser Range
ifP = keywordP "if"
ignoreP :: Parser Range
ignoreP = keywordP "ignore"
inP :: Parser Range
inP = keywordP "in"
includingP :: Parser Range
includingP = keywordP "including"
inPathP :: Parser Range
inPathP = keywordP "inpath"
innerP :: Parser Range
innerP = keywordP "inner"
inputFormatP :: Parser Range
inputFormatP = keywordP "inputformat"
insertP :: Parser Range
insertP = keywordP "insert"
intervalP :: Parser Range
intervalP = keywordP "interval"
intoP :: Parser Range
intoP = keywordP "into"
isP :: Parser Range
isP = keywordP "is"
itemsP :: Parser Range
itemsP = keywordP "items"
joinP :: Parser Range
joinP = keywordP "join"
keysP :: Parser Range
keysP = keywordP "keys"
ksafeP :: Parser Range
ksafeP = keywordP "ksafe"
lastP :: Parser Range
lastP = keywordP "last"
lateralP :: Parser Range
lateralP = keywordP "lateral"
leftP :: Parser Range
leftP = keywordP "left"
likeP :: Parser Range
likeP = keywordP "like"
limitP :: Parser Range
limitP = keywordP "limit"
linesP :: Parser Range
linesP = keywordP "lines"
loadP :: Parser Range
loadP = keywordP "load"
localP :: Parser Range
localP = keywordP "local"
localTimeP :: Parser (Text, Range)
localTimeP = ("localtime",) <$> keywordP "localtime"
localTimestampP :: Parser (Text, Range)
localTimestampP = ("localtimestamp",) <$> keywordP "localtimestamp"
locationP :: Parser Range
locationP = keywordP "location"
mapP :: Parser Range
mapP = keywordP "map"
metadataP :: Parser Range
metadataP = keywordP "metadata"
noP :: Parser Range
noP = keywordP "no"
nodeP :: Parser Range
nodeP = keywordP "node"
nodesP :: Parser Range
nodesP = keywordP "nodes"
noScanP :: Parser Range
noScanP = keywordP "noscan"
notP :: Parser Range
notP = keywordP "not"
notOperatorP :: Parser Range
notOperatorP = keywordP "not" P.<|> symbolP "!"
nullP :: Parser Range
nullP = keywordP "null"
nullsP :: Parser Range
nullsP = keywordP "nulls"
nullsequalP :: Parser Range
nullsequalP = keywordP "nullsequal"
ofP :: Parser Range
ofP = keywordP "of"
offsetP :: Parser Range
offsetP = keywordP "offset"
onP :: Parser Range
onP = keywordP "on"
orP :: Parser Range
orP = keywordP "or"
orcP :: Parser Range
orcP = keywordP "orc"
orderP :: Parser Range
orderP = keywordP "order"
overlapsP :: Parser Range
overlapsP = keywordP "overlaps"
overwriteP :: Parser Range
overwriteP = keywordP "overwrite"
outP :: Parser Range
outP = keywordP "out"
outerP :: Parser Range
outerP = keywordP "outer"
outputFormatP :: Parser Range
outputFormatP = keywordP "outputformat"
overP :: Parser Range
overP = keywordP "over"
parametersP :: Parser Range
parametersP = keywordP "parameters"
parquetP :: Parser Range
parquetP = keywordP "parquet"
partitionP :: Parser Range
partitionP = keywordP "partition"
partitionedP :: Parser Range
partitionedP = keywordP "partitioned"
percentP :: Parser Range
percentP = keywordP "percent"
precedingP :: Parser Range
precedingP = keywordP "preceding"
preserveP :: Parser Range
preserveP = keywordP "preserve"
projectionP :: Parser Range
projectionP = keywordP "projection"
projectionsP :: Parser Range
projectionsP = keywordP "projections"
protectionP :: Parser Range
protectionP = keywordP "protection"
purgeP :: Parser Range
purgeP = keywordP "purge"
randP :: Parser Range
randP = keywordP "rand"
rangeP :: Parser Range
rangeP = keywordP "range"
rcFileP :: Parser Range
rcFileP = keywordP "rcfile"
regexpP :: Parser Range
regexpP = keywordP "regexp"
reloadP :: Parser Range
reloadP = keywordP "reload"
renameP :: Parser Range
renameP = keywordP "rename"
restrictP :: Parser Range
restrictP = keywordP "restrict"
revokeP :: Parser Range
revokeP = keywordP "revoke"
rlikeP :: Parser Range
rlikeP = keywordP "rlike"
rightP :: Parser Range
rightP = keywordP "right"
rollbackP :: Parser Range
rollbackP = keywordP "rollback"
rollupP :: Parser Range
rollupP = keywordP "rollup"
rowP :: Parser Range
rowP = keywordP "row"
rowsP :: Parser Range
rowsP = keywordP "rows"
schemaP :: Parser Range
schemaP = keywordP "schema"
segmentedP :: Parser Range
segmentedP = keywordP "segmented"
selectP :: Parser Range
selectP = keywordP "select"
semicolonP :: Parser Range
semicolonP = symbolP ";"
notSemicolonP :: Parser Range
notSemicolonP = tokNotEqualsP $ TokSymbol ";"
semiP :: Parser Range
semiP = keywordP "semi"
sessionUserP :: Parser (Text, Range)
sessionUserP = ("session_user",) <$> keywordP "session_user"
sequenceFileP :: Parser Range
sequenceFileP = keywordP "sequencefile"
serdeP :: Parser Range
serdeP = keywordP "serde"
serdePropertiesP :: Parser Range
serdePropertiesP = keywordP "serdeproperties"
setP :: Parser Range
setP = keywordP "set"
setsP :: Parser Range
setsP = keywordP "sets"
showP :: Parser Range
showP = keywordP "show"
sortP :: Parser Range
sortP = keywordP "sort"
sortedP :: Parser Range
sortedP = keywordP "sorted"
statisticsP :: Parser Range
statisticsP = keywordP "statistics"
storedP :: Parser Range
storedP = keywordP "stored"
structP :: Parser Range
structP = keywordP "struct"
sysDateP :: Parser (Text, Range)
sysDateP = ("sysdate",) <$> keywordP "sysdate"
tableP :: Parser Range
tableP = keywordP "table"
tableSampleP :: Parser Range
tableSampleP = keywordP "tablesample"
tblPropertiesP :: Parser Range
tblPropertiesP = keywordP "tblproperties"
temporaryP :: Parser Range
temporaryP = keywordP "temporary" P.<|> keywordP "temp"
terminatedP :: Parser Range
terminatedP = keywordP "terminated"
textFileP :: Parser Range
textFileP = keywordP "textfile"
thenP :: Parser Range
thenP = keywordP "then"
timeseriesP :: Parser Range
timeseriesP = keywordP "timeseries"
timestampP :: Parser Range
timestampP = keywordP "timestamp"
toP :: Parser Range
toP = keywordP "to"
trueP :: Parser Range
trueP = keywordP "true"
truncateP :: Parser Range
truncateP = keywordP "truncate"
unboundedP :: Parser Range
unboundedP = keywordP "unbounded"
unionP :: Parser Range
unionP = keywordP "union"
uniontypeP :: Parser Range
uniontypeP = keywordP "uniontype"
unknownP :: Parser Range
unknownP = keywordP "unknown"
unsegmentedP :: Parser Range
unsegmentedP = keywordP "unsegmented"
useP :: Parser Range
useP = keywordP "use"
userP :: Parser (Text, Range)
userP = ("user",) <$> keywordP "user"
valuesP :: Parser Range
valuesP = keywordP "values"
viewP :: Parser Range
viewP = keywordP "view"
whenP :: Parser Range
whenP = keywordP "when"
whereP :: Parser Range
whereP = keywordP "where"
windowP :: Parser Range
windowP = keywordP "window"
withP :: Parser Range
withP = keywordP "with"
inequalityOpP :: Parser (Text, Range)
inequalityOpP = P.tokenPrim showTok posFromTok testTok
where
testTok (TokSymbol op, s, e)
| op `elem` ["<", ">", "<=", ">="] = Just (op, Range s e)
testTok _ = Nothing
equalityOpP :: Parser (Text, Range)
equalityOpP = P.tokenPrim showTok posFromTok testTok
where
testTok (TokSymbol op, s, e)
| op `elem` ["=", "==", "<=>", "<>", "!="] = Just (op, Range s e)
testTok _ = Nothing