{-# LANGUAGE DeriveLift, ScopedTypeVariables, LambdaCase #-}
module TreeSitter.Symbol
( TSSymbol
, fromTSSymbol
, SymbolType(..)
, Symbol(..)
, symbolToName
, escapeOperatorPunctuation
) where
import Data.Char (isAlpha, toUpper, isControl)
import Data.Function ((&))
import Data.Ix (Ix)
import Data.List.Split (condense, split, whenElt)
import Data.Word (Word16)
import Language.Haskell.TH.Syntax
type TSSymbol = Word16
fromTSSymbol :: forall symbol . Symbol symbol => TSSymbol -> symbol
fromTSSymbol symbol = toEnum (min (fromIntegral symbol) (fromEnum (maxBound :: symbol)))
data SymbolType = Regular | Anonymous | Auxiliary
deriving (Enum, Eq, Lift, Ord, Show)
class (Bounded s, Enum s, Ix s, Ord s, Show s) => Symbol s where
symbolType :: s -> SymbolType
symbolToName :: SymbolType -> String -> String
symbolToName ty name
= prefixHidden name
& toWords
& filter (not . all (== '_'))
& map escapeOperatorPunctuation
& (>>= initUpper)
& (prefix ++)
where toWords = split (condense (whenElt (not . isAlpha)))
prefixHidden s@('_':_) = "Hidden" ++ s
prefixHidden s = s
initUpper (c:cs) = toUpper c : cs
initUpper "" = ""
prefix = case ty of
Regular -> ""
Anonymous -> "Anon"
Auxiliary -> "Aux"
escapeOperatorPunctuation :: String -> String
escapeOperatorPunctuation = concatMap $ \case
'{' -> "LBrace"
'}' -> "RBrace"
'(' -> "LParen"
')' -> "RParen"
'.' -> "Dot"
':' -> "Colon"
',' -> "Comma"
'|' -> "Pipe"
';' -> "Semicolon"
'*' -> "Star"
'&' -> "Ampersand"
'=' -> "Equal"
'<' -> "LAngle"
'>' -> "RAngle"
'[' -> "LBracket"
']' -> "RBracket"
'+' -> "Plus"
'-' -> "Minus"
'/' -> "Slash"
'\\' -> "Backslash"
'^' -> "Caret"
'!' -> "Bang"
'%' -> "Percent"
'@' -> "At"
'~' -> "Tilde"
'?' -> "Question"
'`' -> "Backtick"
'#' -> "Hash"
'$' -> "Dollar"
'"' -> "DQuote"
'\'' -> "SQuote"
'\t' -> "Tab"
'\n' -> "LF"
'\r' -> "CR"
other
| isControl other -> escapeOperatorPunctuation (show other)
| otherwise -> [other]