Safe Haskell	Safe
Language	Haskell2010

Kleene.RE

Contents

Construction
Derivative
Transition map
Equivalence
Generation
Other

Synopsis

Documentation

data RE c Source #

Regular expression

Constructors are exposed, but you should use smart constructors in this module to construct RE.

The Eq and Ord instances are structural. The Kleene etc constructors do "weak normalisation", so for values constructed using those operations Eq witnesses "weak equivalence". See equivalent for regular-expression equivalence.

Structure is exposed in Kleene.RE module but consider constructors as half-internal. There are soft-invariants, but violating them shouldn't break anything in the package. (e.g. transitionMap will eventually terminate, but may create more redundant states if starting regexp is not "weakly normalised").

Constructors

REChars (RSet c)	Single character
REAppend [RE c]	Concatenation
REUnion (RSet c) (Set (RE c))	Union
REStar (RE c)	Kleene star

Instances

(Ord c, Enum c, Bounded c) => TransitionMap c (RE c) Source #
Methods transitionMap :: RE c -> Map (RE c) (SF c (RE c)) Source #
(Ord c, Enum c, Bounded c) => Equivalent c (RE c) Source #
Methods equivalent :: RE c -> RE c -> Bool Source #
(Ord c, Enum c, Bounded c) => Match c (RE c) Source #
Methods match :: RE c -> [c] -> Bool Source #
(Ord c, Enum c, Bounded c) => Derivate c (RE c) Source #
Methods nullable :: RE c -> Bool Source # derivate :: c -> RE c -> RE c Source #
(Ord c, Enum c, Bounded c) => FiniteKleene c (RE c) Source #
Methods everything :: RE c Source # charRange :: c -> c -> RE c Source # fromRSet :: RSet c -> RE c Source # dot :: RE c Source # anyChar :: RE c Source #
(Ord c, Enum c, Bounded c) => Kleene c (RE c) Source #
Methods empty :: RE c Source # eps :: RE c Source # char :: c -> RE c Source # appends :: [RE c] -> RE c Source # unions :: [RE c] -> RE c Source # star :: RE c -> RE c Source #
Eq c => Eq (RE c) Source #
Methods (==) :: RE c -> RE c -> Bool # (/=) :: RE c -> RE c -> Bool #
Ord c => Ord (RE c) Source #
Methods compare :: RE c -> RE c -> Ordering # (<) :: RE c -> RE c -> Bool # (<=) :: RE c -> RE c -> Bool # (>) :: RE c -> RE c -> Bool # (>=) :: RE c -> RE c -> Bool # max :: RE c -> RE c -> RE c # min :: RE c -> RE c -> RE c #
Show c => Show (RE c) Source #
Methods showsPrec :: Int -> RE c -> ShowS # show :: RE c -> String # showList :: [RE c] -> ShowS #
(~) * c Char => IsString (RE c) Source #
Methods fromString :: String -> RE c #
Eq c => Semigroup (RE c) Source #
Methods (<>) :: RE c -> RE c -> RE c # sconcat :: NonEmpty (RE c) -> RE c # stimes :: Integral b => b -> RE c -> RE c #
Eq c => Monoid (RE c) Source #
Methods mempty :: RE c # mappend :: RE c -> RE c -> RE c # mconcat :: [RE c] -> RE c #
(Ord c, Enum c, Bounded c, Arbitrary c) => Arbitrary (RE c) Source #
Methods arbitrary :: Gen (RE c) # shrink :: RE c -> [RE c] #
CoArbitrary c => CoArbitrary (RE c) Source #
Methods coarbitrary :: RE c -> Gen b -> Gen b #
(Ord c, Enum c, Bounded c) => JoinSemiLattice (RE c) Source #
Methods (\/) :: RE c -> RE c -> RE c # join :: RE c -> RE c -> RE c #
(Ord c, Enum c, Bounded c) => BoundedJoinSemiLattice (RE c) Source #
Methods bottom :: RE c #
(~) * c Char => Pretty (RE c) Source #
Methods pretty :: RE c -> String Source # prettyS :: RE c -> ShowS Source #

Construction

Binary operators are

<> for append
\/ for union

empty :: RE c Source #

Empty regex. Doesn't accept anything.

>>> putPretty (empty :: RE Char)
^[]$

>>> putPretty (bottom :: RE Char)
^[]$

match (empty :: RE Char) (s :: String) === False

eps :: RE c Source #

Empty string. Note: different than empty.

>>> putPretty eps
^$

>>> putPretty (mempty :: RE Char)
^$

match (eps :: RE Char) s === null (s :: String)

char :: c -> RE c Source #

>>> putPretty (char 'x')
^x$

charRange :: Ord c => c -> c -> RE c Source #

>>> putPretty $ charRange 'a' 'z'
^[a-z]$

anyChar :: Bounded c => RE c Source #

Any character. Note: different than dot!

>>> putPretty anyChar
^[^]$

appends :: Eq c => [RE c] -> RE c Source #

Concatenate regular expressions.

(asREChar r <> s) <> t === r <> (s <> t)

asREChar r <> empty === empty

empty <> asREChar r === empty

asREChar r <> eps === r

eps <> asREChar r === r

unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c Source #

Union of regular expressions.

asREChar r \/ r === r

asREChar r \/ s === s \/ r

(asREChar r \/ s) \/ t === r \/ (s \/ t)

empty \/ asREChar r === r

asREChar r \/ empty === r

everything \/ asREChar r === everything

asREChar r \/ everything === everything

star :: Ord c => RE c -> RE c Source #

Kleene star.

star (star r) === star (asREChar r)

star eps     === asREChar eps

star empty   === asREChar eps

star anyChar === asREChar everything

star (r      \/ eps) === star (asREChar r)

star (char c \/ eps) === star (asREChar (char c))

star (empty  \/ eps) === asREChar eps

string :: [c] -> RE c Source #

Literal string.

>>> putPretty ("foobar" :: RE Char)
^foobar$

>>> putPretty ("(.)" :: RE Char)
^\(\.\)$

Derivative

nullable :: RE c -> Bool Source #

We say that a regular expression r is nullable if the language it defines contains the empty string.

>>> nullable eps
True

>>> nullable (star "x")
True

>>> nullable "foo"
False

derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c Source #

Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\) with respect to a symbol \(a \in \Sigma\) is the language that includes only those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).

>>> putPretty $ derivate 'f' "foobar"
^oobar$

>>> putPretty $ derivate 'x' $ "xyz" \/ "abc"
^yz$

>>> putPretty $ derivate 'x' $ star "xyz"
^yz(xyz)*$

Transition map

transitionMap :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Map (RE c) (SF c (RE c)) Source #

Transition map. Used to construct DFA.

>>> void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: RE Char)
^[]$ : \_ -> "^[]$"
^b$ : \x -> if
    | x <= 'a'  -> "^[]$"
    | x <= 'b'  -> "^$"
    | otherwise -> "^[]$"
^$ : \_ -> "^[]$"
^ab$ : \x -> if
    | x <= '`'  -> "^[]$"
    | x <= 'a'  -> "^b$"
    | otherwise -> "^[]$"

leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> Partition c Source #

Leading character sets of regular expression.

>>> leadingChars "foo"
fromSeparators "ef"

>>> leadingChars (star "b" <> star "e")
fromSeparators "abde"

>>> leadingChars (charRange 'b' 'z')
fromSeparators "az"

Equivalence

equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool Source #

Whether two regexps are equivalent.

equivalent re1 re2 = forall s. match re1 s === match re1 s

>>> let re1 = star "a" <> "a"
>>> let re2 = "a" <> star "a"

These are different regular expressions, even we perform some normalisation-on-construction:

>>> re1 == re2
False

They are however equivalent:

>>> equivalent re1 re2
True

The algorithm works by executing states on "product" regexp, and checking whether all resulting states are both accepting or rejecting.

re1 == re2 ==> equivalent re1 re2

More examples

>>> let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
>>> example re1 re2
^a*a$
^aa*$
True

>>> example (star "aa") (star "aaa")
^(aa)*$
^(aaa)*$
False

>>> example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
^(aa)*(aaa)*$
^(aaa)*(aa)*$
True

>>> example (star ("a" \/ "b")) (star $ star "a" <> star "b")
^[a-b]*$
^(a*b*)*$
True

Generation

generate Source #

Arguments

:: (c -> c -> Gen c)	character range generator
-> Int	seed
-> RE c
-> [[c]]	infinite list of results

Generate random strings of the language RE c describes.

>>> let example = traverse_ print . take 3 . generate (curry QC.choose) 42
>>> example "abc"
"abc"
"abc"
"abc"

>>> example $ star $ "a" \/ "b"
"aaaaba"
"bbba"
"abbbbaaaa"

>>> example empty

all (match r) $ take 10 $ generate (curry QC.choose) 42 (r :: RE Char)

Other

isEmpty :: RE c -> Bool Source #

Whether RE is (structurally) equal to empty.

isEmpty r === all (not . nullable) (Map.keys $ transitionMap $ asREChar r)