Safe Haskell | Safe |
---|---|
Language | Haskell2010 |
- data ERE c
- empty :: ERE c
- eps :: ERE c
- char :: c -> ERE c
- charRange :: Ord c => c -> c -> ERE c
- anyChar :: Bounded c => ERE c
- appends :: Eq c => [ERE c] -> ERE c
- unions :: (Ord c, Enum c) => [ERE c] -> ERE c
- intersections :: (Ord c, Enum c) => [ERE c] -> ERE c
- star :: (Ord c, Bounded c) => ERE c -> ERE c
- string :: [c] -> ERE c
- complement :: ERE c -> ERE c
- nullable :: ERE c -> Bool
- derivate :: (Ord c, Enum c) => c -> ERE c -> ERE c
- transitionMap :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> Map (ERE c) (SF c (ERE c))
- leadingChars :: (Ord c, Enum c, Bounded c) => ERE c -> Partition c
- equivalent :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> ERE c -> Bool
- isEmpty :: ERE c -> Bool
- isEverything :: ERE c -> Bool
Documentation
Extended regular expression
It's both, Kleene and Boolean algebra. (If we add only intersections, it wouldn't be Boolean).
Note: we don't have special constructor for intersections. We use de Morgan formula \(a \land b = \neg (\neg a \lor \neg b)\).
>>>
putPretty $ asEREChar $ "a" /\ "b"
^~(~a|~b)$
There is no generator, as intersections
makes it hard.
EREChars (RSet c) | Single character |
EREAppend [ERE c] | Concatenation |
EREUnion (RSet c) (Set (ERE c)) | Union |
EREStar (ERE c) | Kleene star |
ERENot (ERE c) | Complement |
Construction
Empty regex. Doesn't accept anything.
>>>
putPretty (empty :: ERE Char)
^[]$
>>>
putPretty (bottom :: ERE Char)
^[]$
match (empty :: ERE Char) (s :: String) === False
Empty string. Note: different than empty
.
>>>
putPretty eps
^$
>>>
putPretty (mempty :: ERE Char)
^$
match (eps :: ERE Char) s === null (s :: String)
anyChar :: Bounded c => ERE c Source #
Any character. Note: different than dot!
>>>
putPretty anyChar
^[^]$
appends :: Eq c => [ERE c] -> ERE c Source #
Concatenate regular expressions.
asEREChar r <> empty === empty
empty <> asEREChar r === empty
(asEREChar r <> s) <> t === r <> (s <> t)
asEREChar r <> eps === r
eps <> asEREChar r === r
unions :: (Ord c, Enum c) => [ERE c] -> ERE c Source #
Union of regular expressions.
asEREChar r \/ r === r
asEREChar r \/ s === s \/ r
(asEREChar r \/ s) \/ t === r \/ (s \/ t)
empty \/ asEREChar r === r
asEREChar r \/ empty === r
everything \/ asREChar r === everything
asREChar r \/ everything === everything
intersections :: (Ord c, Enum c) => [ERE c] -> ERE c Source #
Intersection of regular expressions.
asEREChar r /\ r === r
asEREChar r /\ s === s /\ r
(asEREChar r /\ s) /\ t === r /\ (s /\ t)
empty /\ asEREChar r === empty
asEREChar r /\ empty === empty
everything /\ asREChar r === r
asREChar r /\ everything === r
star :: (Ord c, Bounded c) => ERE c -> ERE c Source #
Kleene star.
star (star r) === star (asEREChar r)
star eps === asEREChar eps
star empty === asEREChar eps
star anyChar === asEREChar everything
star (asREChar r \/ eps) === star r
star (char c \/ eps) === star (char (c :: Char))
star (empty \/ eps) === eps
string :: [c] -> ERE c Source #
Literal string.
>>>
putPretty ("foobar" :: ERE Char)
^foobar$
>>>
putPretty ("(.)" :: ERE Char)
^\(\.\)$
complement :: ERE c -> ERE c Source #
Complement.
complement (complement r) === asEREChar r
Derivative
nullable :: ERE c -> Bool Source #
We say that a regular expression r is nullable if the language it defines contains the empty string.
>>>
nullable eps
True
>>>
nullable (star "x")
True
>>>
nullable "foo"
False
>>>
nullable (complement eps)
False
derivate :: (Ord c, Enum c) => c -> ERE c -> ERE c Source #
Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\) with respect to a symbol \(a \in \Sigma\) is the language that includes only those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).
>>>
putPretty $ derivate 'f' "foobar"
^oobar$
>>>
putPretty $ derivate 'x' $ "xyz" \/ "abc"
^yz$
>>>
putPretty $ derivate 'x' $ star "xyz"
^yz(xyz)*$
Transition map
transitionMap :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> Map (ERE c) (SF c (ERE c)) Source #
Transition map. Used to construct DFA
.
>>>
void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: ERE Char)
^[]$ : \_ -> "^[]$" ^b$ : \x -> if | x <= 'a' -> "^[]$" | x <= 'b' -> "^$" | otherwise -> "^[]$" ^$ : \_ -> "^[]$" ^ab$ : \x -> if | x <= '`' -> "^[]$" | x <= 'a' -> "^b$" | otherwise -> "^[]$"
leadingChars :: (Ord c, Enum c, Bounded c) => ERE c -> Partition c Source #
Leading character sets of regular expression.
>>>
leadingChars "foo"
fromSeparators "ef"
>>>
leadingChars (star "b" <> star "e")
fromSeparators "abde"
>>>
leadingChars (charRange 'b' 'z')
fromSeparators "az"
Equivalence
equivalent :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> ERE c -> Bool Source #
Whether two regexps are equivalent.
equivalent
re1 re2 = forall s.match
re1 s ==match
re1 s
>>>
let re1 = star "a" <> "a"
>>>
let re2 = "a" <> star "a"
These are different regular expressions, even we perform some normalisation-on-construction:
>>>
re1 == re2
False
They are however equivalent:
>>>
equivalent re1 re2
True
The algorithm works by executing states
on "product" regexp,
and checking whether all resulting states are both accepting or rejecting.
re1 == re2 ==> equivalent
re1 re2
More examples
>>>
let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
>>>
example re1 re2
^a*a$ ^aa*$ True
>>>
example (star "aa") (star "aaa")
^(aa)*$ ^(aaa)*$ False
>>>
example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
^(aa)*(aaa)*$ ^(aaa)*(aa)*$ True
>>>
example (star ("a" \/ "b")) (star $ star "a" <> star "b")
^[a-b]*$ ^(a*b*)*$ True