Copyright | Copyright (C) 2014 - Uwe Schmidt |
---|---|
License | MIT |
Maintainer | Uwe Schmidt <uwe@fh-wedel.de> |
Stability | stable |
Portability | portable |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
W3C XML Schema Regular Expression Matcher
Grammar can be found under http://www.w3.org/TR/xmlschema11-2/#regexs
Synopsis
- data GenRegex s
- mkZero :: s -> GenRegex s
- mkZero' :: StringLike s => String -> GenRegex s
- mkUnit :: GenRegex s
- mkSym :: StringLike s => CharSet -> GenRegex s
- mkSym1 :: StringLike s => Char -> GenRegex s
- mkSymRng :: StringLike s => Char -> Char -> GenRegex s
- mkWord :: StringLike s => [Char] -> GenRegex s
- mkDot :: GenRegex s
- mkStar :: StringLike s => GenRegex s -> GenRegex s
- mkAll :: StringLike s => GenRegex s
- mkAlt :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkElse :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkSeq :: GenRegex s -> GenRegex s -> GenRegex s
- mkSeqs :: [GenRegex s] -> GenRegex s
- mkRep :: StringLike s => Int -> GenRegex s -> GenRegex s
- mkRng :: StringLike s => Int -> Int -> GenRegex s -> GenRegex s
- mkOpt :: StringLike s => GenRegex s -> GenRegex s
- mkDiff :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkIsect :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkExor :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s
- mkInterleave :: GenRegex s -> GenRegex s -> GenRegex s
- mkCompl :: StringLike s => GenRegex s -> GenRegex s
- mkBr :: s -> GenRegex s -> GenRegex s
- mkBr' :: StringLike s => String -> GenRegex s -> GenRegex s
- isZero :: GenRegex s -> Bool
- errRegex :: StringLike s => GenRegex s -> s
- nullable :: StringLike s => GenRegex s -> Bool
- nullable' :: StringLike s => GenRegex s -> Nullable s
- delta1 :: StringLike s => Char -> s -> GenRegex s -> GenRegex s
- delta :: StringLike s => s -> GenRegex s -> GenRegex s
- firstChars :: StringLike s => GenRegex s -> CharSet
- matchWithRegex :: StringLike s => GenRegex s -> s -> Bool
- matchWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s)
- splitWithRegex :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s, s)
- splitWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (GenRegex s, s)
- splitWithRegexCS :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s)
- splitWithRegexCS' :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (GenRegex s, s)
Documentation
Instances
Eq s => Eq (GenRegex s) Source # | |
Ord s => Ord (GenRegex s) Source # | |
Defined in Text.Regex.XMLSchema.Generic.Regex | |
StringLike s => Show (GenRegex s) Source # | |
mkZero :: s -> GenRegex s Source #
construct the r.e. for the empty set. An (error-) message may be attached
mkSymRng :: StringLike s => Char -> Char -> GenRegex s Source #
construct an r.e. for an intervall of chars
mkAll :: StringLike s => GenRegex s Source #
construct an r.e. for the set of all Unicode words
mkElse :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s Source #
construct the r.e. for r1{|}r2 (r1 orElse r2).
This represents the same r.e. as r1|r2, but when collecting the results of subexpressions in (...) and r1 succeeds, the subexpressions of r2 are discarded, so r1 matches are prioritized
example
splitSubex "({1}x)|({2}.)" "x" = ([("1","x"),("2","x")], "") splitSubex "({1}x){|}({2}.)" "x" = ([("1","x")], "")
mkDiff :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s Source #
Construct difference r.e.: r1 {\} r2
example
match "[a-z]+{\\}bush" "obama" = True match "[a-z]+{\\}bush" "clinton" = True match "[a-z]+{\\}bush" "bush" = False -- not important any more
mkIsect :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s Source #
Construct r.e. for intersection: r1 {&} r2
example
match ".*a.*{&}.*b.*" "-a-b-" = True match ".*a.*{&}.*b.*" "-b-a-" = True match ".*a.*{&}.*b.*" "-a-a-" = False match ".*a.*{&}.*b.*" "---b-" = False
mkExor :: StringLike s => GenRegex s -> GenRegex s -> GenRegex s Source #
Construct r.e. for exclusive or: r1 {^} r2
example
match "[a-c]+{^}[c-d]+" "abc" = True match "[a-c]+{^}[c-d]+" "acdc" = False match "[a-c]+{^}[c-d]+" "ccc" = False match "[a-c]+{^}[c-d]+" "cdc" = True
mkCompl :: StringLike s => GenRegex s -> GenRegex s Source #
Construct the Complement of an r.e.: whole set of words - r
errRegex :: StringLike s => GenRegex s -> s Source #
nullable' :: StringLike s => GenRegex s -> Nullable s Source #
firstChars :: StringLike s => GenRegex s -> CharSet Source #
FIRST for regular expressions
this is only an approximation, the real set of char may be smaller, when the expression contains intersection, set difference or exor operators
matchWithRegex :: StringLike s => GenRegex s -> s -> Bool Source #
matchWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s) Source #
splitWithRegex :: StringLike s => GenRegex s -> s -> Maybe (SubexResults s, s) Source #
This function wraps the whole regex in a subexpression before starting the parse. This is done for getting access to the whole parsed string. Therfore we need one special label, this label is the Nothing value, all explicit labels are Just labels.
splitWithRegex' :: StringLike s => GenRegex s -> s -> Maybe (GenRegex s, s) Source #
The main scanner function
splitWithRegexCS :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (SubexResults s, s) Source #
splitWithRegexCS' :: StringLike s => GenRegex s -> CharSet -> s -> Maybe (GenRegex s, s) Source #
speedup version for splitWithRegex'
This function checks whether the input starts with a char from FIRST re. If this is not the case, the split fails. The FIRST set can be computed once for a whole tokenizer and reused by every call of split