Safe Haskell | Safe |
---|---|
Language | Haskell2010 |
Kleene.RE
Synopsis
- data RE c
- empty :: RE c
- eps :: RE c
- char :: c -> RE c
- charRange :: Ord c => c -> c -> RE c
- anyChar :: Bounded c => RE c
- appends :: Eq c => [RE c] -> RE c
- unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c
- star :: Ord c => RE c -> RE c
- string :: [c] -> RE c
- nullable :: RE c -> Bool
- derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c
- transitionMap :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Map (RE c) (SF c (RE c))
- leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> Partition c
- equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool
- generate :: (c -> c -> Gen c) -> Int -> RE c -> [[c]]
- isEmpty :: RE c -> Bool
- nullableProof :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Maybe (RE c)
Documentation
Regular expression
Constructors are exposed, but you should use
smart constructors in this module to construct RE
.
The Eq
and Ord
instances are structural.
The Kleene
etc constructors do "weak normalisation", so for values
constructed using those operations Eq
witnesses "weak equivalence".
See equivalent
for regular-expression equivalence.
Structure is exposed in Kleene.RE module but consider constructors as
half-internal. There are soft-invariants, but violating them shouldn't
break anything in the package. (e.g. transitionMap
will eventually
terminate, but may create more redundant states if starting regexp is not
"weakly normalised").
Constructors
REChars (RSet c) | Single character |
REAppend [RE c] | Concatenation |
REUnion (RSet c) (Set (RE c)) | Union |
REStar (RE c) | Kleene star |
Instances
ToLatin1 RE Source # | |
(Ord c, Enum c, Bounded c) => Complement c (RE c) Source # | |
Defined in Kleene.DFA Methods complement :: RE c -> RE c Source # | |
(Ord c, Enum c, Bounded c) => TransitionMap c (RE c) Source # | |
Defined in Kleene.Internal.RE | |
(Ord c, Enum c, Bounded c) => Equivalent c (RE c) Source # | |
Defined in Kleene.Internal.RE | |
(Ord c, Enum c, Bounded c) => Match c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => Derivate c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => FiniteKleene c (RE c) Source # | |
(Ord c, Enum c, Bounded c) => CharKleene c (RE c) Source # | |
Eq c => Eq (RE c) Source # | |
Ord c => Ord (RE c) Source # | |
Show c => Show (RE c) Source # | |
c ~ Char => IsString (RE c) Source # | |
Defined in Kleene.Internal.RE Methods fromString :: String -> RE c # | |
Eq c => Semigroup (RE c) Source # | |
Eq c => Monoid (RE c) Source # | |
(Ord c, Enum c, Bounded c, Arbitrary c) => Arbitrary (RE c) Source # | |
CoArbitrary c => CoArbitrary (RE c) Source # | |
Defined in Kleene.Internal.RE Methods coarbitrary :: RE c -> Gen b -> Gen b # | |
(Ord c, Enum c, Bounded c) => Lattice (RE c) Source # | WARNING: The
|
(Ord c, Enum c, Bounded c) => BoundedJoinSemiLattice (RE c) Source # | |
Defined in Kleene.DFA | |
(Ord c, Enum c, Bounded c) => BoundedMeetSemiLattice (RE c) Source # | |
Defined in Kleene.DFA | |
c ~ Char => Pretty (RE c) Source # | |
(Ord c, Enum c, Bounded c) => Kleene (RE c) Source # | |
Construction
Binary operators are
<>
for append\/
for union
Empty regex. Doesn't accept anything.
>>>
putPretty (empty :: RE Char)
^[]$
>>>
putPretty (bottom :: RE Char)
^[]$
match (empty :: RE Char) (s :: String) === False
Empty string. Note: different than empty
.
>>>
putPretty eps
^$
>>>
putPretty (mempty :: RE Char)
^$
match (eps :: RE Char) s === null (s :: String)
anyChar :: Bounded c => RE c Source #
Any character. Note: different than dot!
>>>
putPretty anyChar
^[^]$
appends :: Eq c => [RE c] -> RE c Source #
Concatenate regular expressions.
(asREChar r <> s) <> t === r <> (s <> t)
asREChar r <> empty === empty
empty <> asREChar r === empty
asREChar r <> eps === r
eps <> asREChar r === r
unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c Source #
Union of regular expressions.
asREChar r \/ r === r
asREChar r \/ s === s \/ r
(asREChar r \/ s) \/ t === r \/ (s \/ t)
empty \/ asREChar r === r
asREChar r \/ empty === r
everything \/ asREChar r === everything
asREChar r \/ everything === everything
star :: Ord c => RE c -> RE c Source #
Kleene star.
star (star r) === star (asREChar r)
star eps === asREChar eps
star empty === asREChar eps
star anyChar === asREChar everything
star (r \/ eps) === star (asREChar r)
star (char c \/ eps) === star (asREChar (char c))
star (empty \/ eps) === asREChar eps
string :: [c] -> RE c Source #
Literal string.
>>>
putPretty ("foobar" :: RE Char)
^foobar$
>>>
putPretty ("(.)" :: RE Char)
^\(\.\)$
Derivative
nullable :: RE c -> Bool Source #
We say that a regular expression r is nullable if the language it defines contains the empty string.
>>>
nullable eps
True
>>>
nullable (star "x")
True
>>>
nullable "foo"
False
derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c Source #
Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\) with respect to a symbol \(a \in \Sigma\) is the language that includes only those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).
>>>
putPretty $ derivate 'f' "foobar"
^oobar$
>>>
putPretty $ derivate 'x' $ "xyz" \/ "abc"
^yz$
>>>
putPretty $ derivate 'x' $ star "xyz"
^yz(xyz)*$
Transition map
transitionMap :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Map (RE c) (SF c (RE c)) Source #
Transition map. Used to construct DFA
.
>>>
void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: RE Char)
^[]$ : \_ -> "^[]$" ^b$ : \x -> if | x <= 'a' -> "^[]$" | x <= 'b' -> "^$" | otherwise -> "^[]$" ^$ : \_ -> "^[]$" ^ab$ : \x -> if | x <= '`' -> "^[]$" | x <= 'a' -> "^b$" | otherwise -> "^[]$"
leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> Partition c Source #
Leading character sets of regular expression.
>>>
leadingChars "foo"
fromSeparators "ef"
>>>
leadingChars (star "b" <> star "e")
fromSeparators "abde"
>>>
leadingChars (charRange 'b' 'z')
fromSeparators "az"
Equivalence
equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool Source #
Whether two regexps are equivalent.
equivalent
re1 re2 = forall s.match
re1 s ===match
re1 s
>>>
let re1 = star "a" <> "a"
>>>
let re2 = "a" <> star "a"
These are different regular expressions, even we perform some normalisation-on-construction:
>>>
re1 == re2
False
They are however equivalent:
>>>
equivalent re1 re2
True
The algorithm works by executing states
on "product" regexp,
and checking whether all resulting states are both accepting or rejecting.
re1 == re2 ==> equivalent
re1 re2
More examples
>>>
let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
>>>
example re1 re2
^a*a$ ^aa*$ True
>>>
example (star "aa") (star "aaa")
^(aa)*$ ^(aaa)*$ False
>>>
example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
^(aa)*(aaa)*$ ^(aaa)*(aa)*$ True
>>>
example (star ("a" \/ "b")) (star $ star "a" <> star "b")
^[a-b]*$ ^(a*b*)*$ True
Generation
Arguments
:: (c -> c -> Gen c) | character range generator |
-> Int | seed |
-> RE c | |
-> [[c]] | infinite list of results |
Generate random strings of the language RE c
describes.
>>>
let example = traverse_ print . take 3 . generate (curry QC.choose) 42
>>>
example "abc"
"abc" "abc" "abc"
>>>
example $ star $ "a" \/ "b"
"aaaaba" "bbba" "abbbbaaaa"
>>>
example empty
all (match r) $ take 10 $ generate (curry QC.choose) 42 (r :: RE Char)
Other
nullableProof :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Maybe (RE c) Source #
Not only we can decide whether RE
is nullable, we can also
remove the empty string:
>>>
putPretty $ nullableProof eps
^[]$
>>>
putPretty $ nullableProof $ star "x"
^xx*$
>>>
putPretty $ nullableProof "foo"
Nothing
nullableProof
is consistent with nullable
:
isJust (nullableProof r) === nullable (asREChar r)
The returned regular expression is not nullable:
maybe True (not . nullable) $ nullableProof $ asREChar r
If we union with empty regex, we get a equivalent regular expression we started with:
maybe r (eps \/) (nullableProof r) `equivalent` (asREChar r)