module Text.XML.HaXml.Xtract.Lex
( lexXtract
, Posn(..)
, TokenT(..)
, Token
) where
import Data.Char
type Token = Either String (Posn, TokenT)
data Posn = Pn Int
deriving Posn -> Posn -> Bool
(Posn -> Posn -> Bool) -> (Posn -> Posn -> Bool) -> Eq Posn
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Posn -> Posn -> Bool
$c/= :: Posn -> Posn -> Bool
== :: Posn -> Posn -> Bool
$c== :: Posn -> Posn -> Bool
Eq
instance Show Posn where
showsPrec :: Int -> Posn -> ShowS
showsPrec Int
_p (Pn Int
c) = String -> ShowS
showString String
"char pos " ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ShowS
forall a. Show a => a -> ShowS
shows Int
c
data TokenT =
Symbol String
| TokString String
| TokNum Integer
deriving TokenT -> TokenT -> Bool
(TokenT -> TokenT -> Bool)
-> (TokenT -> TokenT -> Bool) -> Eq TokenT
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TokenT -> TokenT -> Bool
$c/= :: TokenT -> TokenT -> Bool
== :: TokenT -> TokenT -> Bool
$c== :: TokenT -> TokenT -> Bool
Eq
instance Show TokenT where
showsPrec :: Int -> TokenT -> ShowS
showsPrec Int
_p (Symbol String
s) = String -> ShowS
showString String
s
showsPrec Int
_p (TokString String
s) = String -> ShowS
showString String
s
showsPrec Int
_p (TokNum Integer
n) = Integer -> ShowS
forall a. Show a => a -> ShowS
shows Integer
n
emit :: TokenT -> Posn -> Token
emit :: TokenT -> Posn -> Token
emit TokenT
tok Posn
p = Posn -> Int
forcep Posn
p Int -> Token -> Token
`seq` (Posn, TokenT) -> Token
forall a b. b -> Either a b
Right (Posn
p,TokenT
tok)
where forcep :: Posn -> Int
forcep (Pn Int
n) = Int
n
lexerror :: String -> Posn -> [Token]
lexerror :: String -> Posn -> [Token]
lexerror String
s Posn
p = [String -> Token
forall a b. a -> Either a b
Left (String
"Lexical error in selection pattern at "String -> ShowS
forall a. [a] -> [a] -> [a]
++Posn -> String
forall a. Show a => a -> String
show Posn
pString -> ShowS
forall a. [a] -> [a] -> [a]
++String
": "
String -> ShowS
forall a. [a] -> [a] -> [a]
++String
sString -> ShowS
forall a. [a] -> [a] -> [a]
++String
"\n")]
addcol :: Int -> Posn -> Posn
addcol :: Int -> Posn -> Posn
addcol Int
n (Pn Int
c) = Int -> Posn
Pn (Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
n)
newline, tab :: Posn -> Posn
newline :: Posn -> Posn
newline (Pn Int
c) = Int -> Posn
Pn (Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
tab :: Posn -> Posn
tab (Pn Int
c) = Int -> Posn
Pn (((Int
cInt -> Int -> Int
forall a. Integral a => a -> a -> a
`div`Int
8)Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
8)
white :: Char -> Posn -> Posn
white :: Char -> Posn -> Posn
white Char
'\t' = Posn -> Posn
tab
white Char
' ' = Int -> Posn -> Posn
addcol Int
1
white Char
'\n' = Int -> Posn -> Posn
addcol Int
1
white Char
'\r' = Int -> Posn -> Posn
addcol Int
1
white Char
'\xa0' = Int -> Posn -> Posn
addcol Int
1
blank :: (Posn->String->[Token]) -> Posn-> String-> [Token]
blank :: (Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank Posn -> String -> [Token]
_ Posn
_ [] = []
blank Posn -> String -> [Token]
k Posn
p (Char
' ': String
s) = (Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank Posn -> String -> [Token]
k (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
s
blank Posn -> String -> [Token]
k Posn
p (Char
'\t':String
s) = (Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank Posn -> String -> [Token]
k (Posn -> Posn
tab Posn
p) String
s
blank Posn -> String -> [Token]
k Posn
p (Char
'\n':String
s) = (Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank Posn -> String -> [Token]
k (Posn -> Posn
newline Posn
p) String
s
blank Posn -> String -> [Token]
k Posn
p (Char
'\r':String
s) = (Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank Posn -> String -> [Token]
k Posn
p String
s
blank Posn -> String -> [Token]
k Posn
p (Char
'\xa0': String
s) = (Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank Posn -> String -> [Token]
k (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
s
blank Posn -> String -> [Token]
k Posn
p String
s = Posn -> String -> [Token]
k Posn
p String
s
lexXtract :: (String->String) -> String -> [Token]
lexXtract :: ShowS -> String -> [Token]
lexXtract ShowS
f = ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn
Pn Int
1)
syms :: [Char]
syms :: String
syms = String
"/[]()@,=*&|~$+-<>"
selAny :: (String->String) -> Posn -> String -> [Token]
selAny :: ShowS -> Posn -> String -> [Token]
selAny ShowS
_ Posn
_ [] = []
selAny ShowS
f Posn
p (Char
'/':Char
'/':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
"//") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
2 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'!':Char
'=':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
"!=") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
2 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'<':Char
'=':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
"<=") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
2 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'>':Char
'=':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
">=") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
2 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'\'':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
"'") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:
Char
-> TokenT
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
accumulateUntil Char
'\'' (String -> TokenT
Symbol String
"'") [] Posn
p (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss
(ShowS -> Posn -> String -> [Token]
selAny ShowS
f)
selAny ShowS
f Posn
p (Char
'"':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
"\"") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:
Char
-> TokenT
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
accumulateUntil Char
'"' (String -> TokenT
Symbol String
"\"") [] Posn
p (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss
(ShowS -> Posn -> String -> [Token]
selAny ShowS
f)
selAny ShowS
f Posn
p (Char
'_':String
ss) = ShowS
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
gatherName ShowS
f String
"_" Posn
p (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss ((Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank (ShowS -> Posn -> String -> [Token]
selAny ShowS
f))
selAny ShowS
f Posn
p (Char
':':String
ss) = ShowS
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
gatherName ShowS
f String
":" Posn
p (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss ((Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank (ShowS -> Posn -> String -> [Token]
selAny ShowS
f))
selAny ShowS
f Posn
p (Char
'.':Char
'=':Char
'.':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
".=.") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
3 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'.':Char
'!':Char
'=':Char
'.':String
ss)
= TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
".!=.") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
4 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'.':Char
'<':Char
'.':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
".<.") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
3 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'.':Char
'<':Char
'=':Char
'.':String
ss)
= TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
".<=.") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
4 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'.':Char
'>':Char
'.':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
".>.") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
3 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'.':Char
'>':Char
'=':Char
'.':String
ss)
= TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
".>=.") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
4 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
'.':Char
'/':String
ss) = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol String
"./") Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
2 Posn
p) String
ss
selAny ShowS
f Posn
p (Char
s:String
ss)
| Char
s Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
syms = TokenT -> Posn -> Token
emit (String -> TokenT
Symbol [Char
s]) Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: ShowS -> Posn -> String -> [Token]
selAny ShowS
f (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss
| Char -> Bool
isSpace Char
s = (Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank (ShowS -> Posn -> String -> [Token]
selAny ShowS
f) Posn
p (Char
sChar -> ShowS
forall a. a -> [a] -> [a]
:String
ss)
| Char -> Bool
isAlpha Char
s = ShowS
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
gatherName ShowS
f [Char
s] Posn
p (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss ((Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank (ShowS -> Posn -> String -> [Token]
selAny ShowS
f))
| Char -> Bool
isDigit Char
s = String
-> Posn -> Posn -> String -> (Posn -> String -> [Token]) -> [Token]
gatherNum [Char
s] Posn
p (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss ((Posn -> String -> [Token]) -> Posn -> String -> [Token]
blank (ShowS -> Posn -> String -> [Token]
selAny ShowS
f))
| Bool
otherwise = String -> Posn -> [Token]
lexerror String
"unrecognised pattern" Posn
p
gatherName :: (String->String) -> String -> Posn -> Posn -> String
-> (Posn->String->[Token]) -> [Token]
gatherName :: ShowS
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
gatherName ShowS
f String
acc Posn
pos Posn
p (Char
s:String
ss) Posn -> String -> [Token]
k
| Char -> Bool
isAlphaNum Char
s Bool -> Bool -> Bool
|| Char
s Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"-_:" = ShowS
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
gatherName ShowS
f (Char
sChar -> ShowS
forall a. a -> [a] -> [a]
:String
acc) Posn
pos (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss Posn -> String -> [Token]
k
gatherName ShowS
f String
acc Posn
pos Posn
p String
ss Posn -> String -> [Token]
k =
TokenT -> Posn -> Token
emit (String -> TokenT
TokString (ShowS
f (ShowS
forall a. [a] -> [a]
reverse String
acc))) Posn
posToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Posn -> String -> [Token]
k Posn
p String
ss
gatherNum :: String -> Posn -> Posn -> String
-> (Posn->String->[Token]) -> [Token]
gatherNum :: String
-> Posn -> Posn -> String -> (Posn -> String -> [Token]) -> [Token]
gatherNum String
acc Posn
pos Posn
p (Char
s:String
ss) Posn -> String -> [Token]
k
| Char -> Bool
isHexDigit Char
s = String
-> Posn -> Posn -> String -> (Posn -> String -> [Token]) -> [Token]
gatherNum (Char
sChar -> ShowS
forall a. a -> [a] -> [a]
:String
acc) Posn
pos (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss Posn -> String -> [Token]
k
gatherNum String
acc Posn
pos Posn
p String
ss Posn -> String -> [Token]
k =
TokenT -> Posn -> Token
emit (Integer -> TokenT
TokNum (String -> Integer
forall a. Read a => String -> a
read (ShowS
forall a. [a] -> [a]
reverse String
acc))) Posn
posToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Posn -> String -> [Token]
k Posn
p String
ss
accumulateUntil :: Char -> TokenT -> String -> Posn -> Posn -> String
-> (Posn->String->[Token]) -> [Token]
accumulateUntil :: Char
-> TokenT
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
accumulateUntil Char
c TokenT
_tok String
_acc Posn
pos Posn
p [] Posn -> String -> [Token]
_k =
String -> Posn -> [Token]
lexerror (String
"found end of pattern while looking for "String -> ShowS
forall a. [a] -> [a] -> [a]
++Char
c
Char -> ShowS
forall a. a -> [a] -> [a]
:String
" to match opening quote at "String -> ShowS
forall a. [a] -> [a] -> [a]
++Posn -> String
forall a. Show a => a -> String
show Posn
pos) Posn
p
accumulateUntil Char
c TokenT
tok String
acc Posn
pos Posn
p (Char
s:String
ss) Posn -> String -> [Token]
k
| Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
s = TokenT -> Posn -> Token
emit (String -> TokenT
TokString (ShowS
forall a. [a] -> [a]
reverse String
acc)) Posn
posToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:
TokenT -> Posn -> Token
emit TokenT
tok Posn
pToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Posn -> String -> [Token]
k (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss
| Char -> Bool
isSpace Char
s = Char
-> TokenT
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
accumulateUntil Char
c TokenT
tok (Char
sChar -> ShowS
forall a. a -> [a] -> [a]
:String
acc) Posn
pos (Char -> Posn -> Posn
white Char
s Posn
p) String
ss Posn -> String -> [Token]
k
| Bool
otherwise = Char
-> TokenT
-> String
-> Posn
-> Posn
-> String
-> (Posn -> String -> [Token])
-> [Token]
accumulateUntil Char
c TokenT
tok (Char
sChar -> ShowS
forall a. a -> [a] -> [a]
:String
acc) Posn
pos (Int -> Posn -> Posn
addcol Int
1 Posn
p) String
ss Posn -> String -> [Token]
k