Safe Haskell | Safe |
---|---|
Language | Haskell2010 |
Kleene.RE
- data RE c
- empty :: RE c
- eps :: RE c
- char :: c -> RE c
- charRange :: Ord c => c -> c -> RE c
- anyChar :: Bounded c => RE c
- appends :: Eq c => [RE c] -> RE c
- unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c
- star :: Ord c => RE c -> RE c
- string :: [c] -> RE c
- nullable :: RE c -> Bool
- derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c
- transitionMap :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Map (RE c) (SF c (RE c))
- leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> Partition c
- equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool
- generate :: (c -> c -> Gen c) -> Int -> RE c -> [[c]]
- isEmpty :: RE c -> Bool
Documentation
Regular expression
Constructors are exposed, but you should use
smart constructors in this module to construct RE
.
The Eq
and Ord
instances are structural.
The Kleene
etc constructors do "weak normalisation", so for values
constructed using those operations Eq
witnesses "weak equivalence".
See equivalent
for regular-expression equivalence.
Structure is exposed in Kleene.RE module but consider constructors as
half-internal. There are soft-invariants, but violating them shouldn't
break anything in the package. (e.g. transitionMap
will eventually
terminate, but may create more redundant states if starting regexp is not
"weakly normalised").
Constructors
REChars (RSet c) | Single character |
REAppend [RE c] | Concatenation |
REUnion (RSet c) (Set (RE c)) | Union |
REStar (RE c) | Kleene star |
Instances
(Ord c, Enum c, Bounded c) => TransitionMap c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => Equivalent c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => Match c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => Derivate c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => FiniteKleene c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => Kleene c (RE c) Source # | |
Eq c => Eq (RE c) Source # | |
Ord c => Ord (RE c) Source # | |
Show c => Show (RE c) Source # | |
(~) * c Char => IsString (RE c) Source # | |
Eq c => Semigroup (RE c) Source # | |
Eq c => Monoid (RE c) Source # | |
(Ord c, Enum c, Bounded c, Arbitrary c) => Arbitrary (RE c) Source # | |
CoArbitrary c => CoArbitrary (RE c) Source # | |
(Ord c, Enum c, Bounded c) => JoinSemiLattice (RE c) Source # | |
(Ord c, Enum c, Bounded c) => BoundedJoinSemiLattice (RE c) Source # | |
(~) * c Char => Pretty (RE c) Source # | |
Construction
Empty regex. Doesn't accept anything.
>>>
putPretty (empty :: RE Char)
^[]$
>>>
putPretty (bottom :: RE Char)
^[]$
match (empty :: RE Char) (s :: String) === False
Empty string. Note: different than empty
.
>>>
putPretty eps
^$
>>>
putPretty (mempty :: RE Char)
^$
match (eps :: RE Char) s === null (s :: String)
anyChar :: Bounded c => RE c Source #
Any character. Note: different than dot!
>>>
putPretty anyChar
^[^]$
appends :: Eq c => [RE c] -> RE c Source #
Concatenate regular expressions.
(asREChar r <> s) <> t === r <> (s <> t)
asREChar r <> empty === empty
empty <> asREChar r === empty
asREChar r <> eps === r
eps <> asREChar r === r
unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c Source #
Union of regular expressions.
asREChar r \/ r === r
asREChar r \/ s === s \/ r
(asREChar r \/ s) \/ t === r \/ (s \/ t)
empty \/ asREChar r === r
asREChar r \/ empty === r
everything \/ asREChar r === everything
asREChar r \/ everything === everything
star :: Ord c => RE c -> RE c Source #
Kleene star.
star (star r) === star (asREChar r)
star eps === asREChar eps
star empty === asREChar eps
star anyChar === asREChar everything
star (r \/ eps) === star (asREChar r)
star (char c \/ eps) === star (asREChar (char c))
star (empty \/ eps) === asREChar eps
string :: [c] -> RE c Source #
Literal string.
>>>
putPretty ("foobar" :: RE Char)
^foobar$
>>>
putPretty ("(.)" :: RE Char)
^\(\.\)$
Derivative
nullable :: RE c -> Bool Source #
We say that a regular expression r is nullable if the language it defines contains the empty string.
>>>
nullable eps
True
>>>
nullable (star "x")
True
>>>
nullable "foo"
False
derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c Source #
Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\) with respect to a symbol \(a \in \Sigma\) is the language that includes only those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).
>>>
putPretty $ derivate 'f' "foobar"
^oobar$
>>>
putPretty $ derivate 'x' $ "xyz" \/ "abc"
^yz$
>>>
putPretty $ derivate 'x' $ star "xyz"
^yz(xyz)*$
Transition map
transitionMap :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Map (RE c) (SF c (RE c)) Source #
Transition map. Used to construct DFA
.
>>>
void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: RE Char)
^[]$ : \_ -> "^[]$" ^b$ : \x -> if | x <= 'a' -> "^[]$" | x <= 'b' -> "^$" | otherwise -> "^[]$" ^$ : \_ -> "^[]$" ^ab$ : \x -> if | x <= '`' -> "^[]$" | x <= 'a' -> "^b$" | otherwise -> "^[]$"
leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> Partition c Source #
Leading character sets of regular expression.
>>>
leadingChars "foo"
fromSeparators "ef"
>>>
leadingChars (star "b" <> star "e")
fromSeparators "abde"
>>>
leadingChars (charRange 'b' 'z')
fromSeparators "az"
Equivalence
equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool Source #
Whether two regexps are equivalent.
equivalent
re1 re2 = forall s.match
re1 s ===match
re1 s
>>>
let re1 = star "a" <> "a"
>>>
let re2 = "a" <> star "a"
These are different regular expressions, even we perform some normalisation-on-construction:
>>>
re1 == re2
False
They are however equivalent:
>>>
equivalent re1 re2
True
The algorithm works by executing states
on "product" regexp,
and checking whether all resulting states are both accepting or rejecting.
re1 == re2 ==> equivalent
re1 re2
More examples
>>>
let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
>>>
example re1 re2
^a*a$ ^aa*$ True
>>>
example (star "aa") (star "aaa")
^(aa)*$ ^(aaa)*$ False
>>>
example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
^(aa)*(aaa)*$ ^(aaa)*(aa)*$ True
>>>
example (star ("a" \/ "b")) (star $ star "a" <> star "b")
^[a-b]*$ ^(a*b*)*$ True
Generation
Arguments
:: (c -> c -> Gen c) | character range generator |
-> Int | seed |
-> RE c | |
-> [[c]] | infinite list of results |
Generate random strings of the language RE c
describes.
>>>
let example = traverse_ print . take 3 . generate (curry QC.choose) 42
>>>
example "abc"
"abc" "abc" "abc"
>>>
example $ star $ "a" \/ "b"
"aaaaba" "bbba" "abbbbaaaa"
>>>
example empty
all (match r) $ take 10 $ generate (curry QC.choose) 42 (r :: RE Char)