Safe Haskell	Safe
Language	Haskell2010

Kleene.RE

Contents

Construction
Derivative
Transition map
Equivalence
Generation
Other

Synopsis

data RE c
- = REChars (RSet c)
- | REAppend [RE c]
- | REUnion (RSet c) (Set (RE c))
- | REStar (RE c)
empty :: RE c
eps :: RE c
char :: c -> RE c
charRange :: Ord c => c -> c -> RE c
anyChar :: Bounded c => RE c
appends :: Eq c => [RE c] -> RE c
unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c
star :: Ord c => RE c -> RE c
string :: [c] -> RE c
nullable :: RE c -> Bool
derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c
transitionMap :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Map (RE c) (SF c (RE c))
leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> Partition c
equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool
generate :: (c -> c -> Gen c) -> Int -> RE c -> [[c]]
isEmpty :: RE c -> Bool
nullableProof :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Maybe (RE c)

Documentation

data RE c Source #

Regular expression

Constructors are exposed, but you should use smart constructors in this module to construct RE.

The Eq and Ord instances are structural. The Kleene etc constructors do "weak normalisation", so for values constructed using those operations Eq witnesses "weak equivalence". See equivalent for regular-expression equivalence.

Structure is exposed in Kleene.RE module but consider constructors as half-internal. There are soft-invariants, but violating them shouldn't break anything in the package. (e.g. transitionMap will eventually terminate, but may create more redundant states if starting regexp is not "weakly normalised").

Constructors

REChars (RSet c)	Single character
REAppend [RE c]	Concatenation
REUnion (RSet c) (Set (RE c))	Union
REStar (RE c)	Kleene star

Instances

ToLatin1 RE Source #
Instance details Defined in Kleene.Internal.RE Methods toLatin1 :: RE Char -> RE Word8 Source #
(Ord c, Enum c, Bounded c) => Complement c (RE c) Source #
Instance details Defined in Kleene.DFA Methods complement :: RE c -> RE c Source #
(Ord c, Enum c, Bounded c) => TransitionMap c (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods transitionMap :: RE c -> Map (RE c) (SF c (RE c)) Source #
(Ord c, Enum c, Bounded c) => Equivalent c (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods equivalent :: RE c -> RE c -> Bool Source #
(Ord c, Enum c, Bounded c) => Match c (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods match :: RE c -> [c] -> Bool Source # match8 :: RE c -> ByteString -> Bool Source #
(Ord c, Enum c, Bounded c) => Derivate c (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods nullable :: RE c -> Bool Source # derivate :: c -> RE c -> RE c Source #
(Ord c, Enum c, Bounded c) => FiniteKleene c (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods everything :: RE c Source # charRange :: c -> c -> RE c Source # fromRSet :: RSet c -> RE c Source # dot :: RE c Source # anyChar :: RE c Source # notChar :: c -> RE c Source #
(Ord c, Enum c, Bounded c) => CharKleene c (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods char :: c -> RE c Source # string :: [c] -> RE c Source #
Eq c => Eq (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods (==) :: RE c -> RE c -> Bool # (/=) :: RE c -> RE c -> Bool #
Ord c => Ord (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods compare :: RE c -> RE c -> Ordering # (<) :: RE c -> RE c -> Bool # (<=) :: RE c -> RE c -> Bool # (>) :: RE c -> RE c -> Bool # (>=) :: RE c -> RE c -> Bool # max :: RE c -> RE c -> RE c # min :: RE c -> RE c -> RE c #
Show c => Show (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods showsPrec :: Int -> RE c -> ShowS # show :: RE c -> String # showList :: [RE c] -> ShowS #
c ~ Char => IsString (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods fromString :: String -> RE c #
Eq c => Semigroup (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods (<>) :: RE c -> RE c -> RE c # sconcat :: NonEmpty (RE c) -> RE c # stimes :: Integral b => b -> RE c -> RE c #
Eq c => Monoid (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods mempty :: RE c # mappend :: RE c -> RE c -> RE c # mconcat :: [RE c] -> RE c #
(Ord c, Enum c, Bounded c, Arbitrary c) => Arbitrary (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods arbitrary :: Gen (RE c) # shrink :: RE c -> [RE c] #
CoArbitrary c => CoArbitrary (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods coarbitrary :: RE c -> Gen b -> Gen b #
(Ord c, Enum c, Bounded c) => Lattice (RE c) Source #	WARNING: The `/\` is inefficient, it actually computes the conjunction: `>>>` `putPretty $ asREChar $ "a" /\ "b"`^[]$ `>>>` `putPretty $ asREChar $ "a" /\ star "a"`^a$ `>>>` `putPretty $ asREChar $ star "aa" /\ star "aaa"`^(a(aaaaaa)*aaaaa)?$
Instance details Defined in Kleene.DFA Methods (\/) :: RE c -> RE c -> RE c # (/\) :: RE c -> RE c -> RE c #
(Ord c, Enum c, Bounded c) => BoundedJoinSemiLattice (RE c) Source #
Instance details Defined in Kleene.DFA Methods bottom :: RE c #
(Ord c, Enum c, Bounded c) => BoundedMeetSemiLattice (RE c) Source #
Instance details Defined in Kleene.DFA Methods top :: RE c #
c ~ Char => Pretty (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods pretty :: RE c -> String Source # prettyS :: RE c -> ShowS Source #
(Ord c, Enum c, Bounded c) => Kleene (RE c) Source #
Instance details Defined in Kleene.Internal.RE Methods empty :: RE c Source # eps :: RE c Source # appends :: [RE c] -> RE c Source # unions :: [RE c] -> RE c Source # star :: RE c -> RE c Source #

Construction

Binary operators are

<> for append
\/ for union

empty :: RE c Source #

Empty regex. Doesn't accept anything.

>>> putPretty (empty :: RE Char)
^[]$

>>> putPretty (bottom :: RE Char)
^[]$

match (empty :: RE Char) (s :: String) === False

eps :: RE c Source #

Empty string. Note: different than empty.

>>> putPretty eps
^$

>>> putPretty (mempty :: RE Char)
^$

match (eps :: RE Char) s === null (s :: String)

char :: c -> RE c Source #

>>> putPretty (char 'x')
^x$

charRange :: Ord c => c -> c -> RE c Source #

>>> putPretty $ charRange 'a' 'z'
^[a-z]$

anyChar :: Bounded c => RE c Source #

Any character. Note: different than dot!

>>> putPretty anyChar
^[^]$

appends :: Eq c => [RE c] -> RE c Source #

Concatenate regular expressions.

(asREChar r <> s) <> t === r <> (s <> t)

asREChar r <> empty === empty

empty <> asREChar r === empty

asREChar r <> eps === r

eps <> asREChar r === r

unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c Source #

Union of regular expressions.

asREChar r \/ r === r

asREChar r \/ s === s \/ r

(asREChar r \/ s) \/ t === r \/ (s \/ t)

empty \/ asREChar r === r

asREChar r \/ empty === r

everything \/ asREChar r === everything

asREChar r \/ everything === everything

star :: Ord c => RE c -> RE c Source #

Kleene star.

star (star r) === star (asREChar r)

star eps     === asREChar eps

star empty   === asREChar eps

star anyChar === asREChar everything

star (r      \/ eps) === star (asREChar r)

star (char c \/ eps) === star (asREChar (char c))

star (empty  \/ eps) === asREChar eps

string :: [c] -> RE c Source #

Literal string.

>>> putPretty ("foobar" :: RE Char)
^foobar$

>>> putPretty ("(.)" :: RE Char)
^\(\.\)$

Derivative

nullable :: RE c -> Bool Source #

We say that a regular expression r is nullable if the language it defines contains the empty string.

>>> nullable eps
True

>>> nullable (star "x")
True

>>> nullable "foo"
False

derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c Source #

Intuitively, the derivative of a language $\mathcal{L} \subset \Sigma^\star$ with respect to a symbol $a \in \Sigma$ is the language that includes only those suffixes of strings with a leading symbol $a$ in $\mathcal{L}$.

>>> putPretty $ derivate 'f' "foobar"
^oobar$

>>> putPretty $ derivate 'x' $ "xyz" \/ "abc"
^yz$

>>> putPretty $ derivate 'x' $ star "xyz"
^yz(xyz)*$

Transition map

transitionMap :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Map (RE c) (SF c (RE c)) Source #

Transition map. Used to construct DFA.

>>> void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: RE Char)
^[]$ : \_ -> "^[]$"
^b$ : \x -> if
    | x <= 'a'  -> "^[]$"
    | x <= 'b'  -> "^$"
    | otherwise -> "^[]$"
^$ : \_ -> "^[]$"
^ab$ : \x -> if
    | x <= '`'  -> "^[]$"
    | x <= 'a'  -> "^b$"
    | otherwise -> "^[]$"

leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> Partition c Source #

Leading character sets of regular expression.

>>> leadingChars "foo"
fromSeparators "ef"

>>> leadingChars (star "b" <> star "e")
fromSeparators "abde"

>>> leadingChars (charRange 'b' 'z')
fromSeparators "az"

Equivalence

equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool Source #

Whether two regexps are equivalent.

equivalent re1 re2 = forall s. match re1 s === match re1 s

>>> let re1 = star "a" <> "a"
>>> let re2 = "a" <> star "a"

These are different regular expressions, even we perform some normalisation-on-construction:

>>> re1 == re2
False

They are however equivalent:

>>> equivalent re1 re2
True

The algorithm works by executing states on "product" regexp, and checking whether all resulting states are both accepting or rejecting.

re1 == re2 ==> equivalent re1 re2

More examples

>>> let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
>>> example re1 re2
^a*a$
^aa*$
True

>>> example (star "aa") (star "aaa")
^(aa)*$
^(aaa)*$
False

>>> example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
^(aa)*(aaa)*$
^(aaa)*(aa)*$
True

>>> example (star ("a" \/ "b")) (star $ star "a" <> star "b")
^[a-b]*$
^(a*b*)*$
True

Generation

generate Source #

Arguments

:: (c -> c -> Gen c)	character range generator
-> Int	seed
-> RE c
-> [[c]]	infinite list of results

Generate random strings of the language RE c describes.

>>> let example = traverse_ print . take 3 . generate (curry QC.choose) 42
>>> example "abc"
"abc"
"abc"
"abc"

>>> example $ star $ "a" \/ "b"
"aaaaba"
"bbba"
"abbbbaaaa"

>>> example empty

all (match r) $ take 10 $ generate (curry QC.choose) 42 (r :: RE Char)

Other

isEmpty :: RE c -> Bool Source #

Whether RE is (structurally) equal to empty.

isEmpty r === all (not . nullable) (Map.keys $ transitionMap $ asREChar r)

nullableProof :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Maybe (RE c) Source #

Not only we can decide whether RE is nullable, we can also remove the empty string:

>>> putPretty $ nullableProof eps
^[]$

>>> putPretty $ nullableProof $ star "x"
^xx*$

>>> putPretty $ nullableProof "foo"
Nothing

nullableProof is consistent with nullable:

isJust (nullableProof r) === nullable (asREChar r)

The returned regular expression is not nullable:

maybe True (not . nullable) $ nullableProof $ asREChar r

If we union with empty regex, we get a equivalent regular expression we started with:

maybe r (eps \/) (nullableProof r) `equivalent` (asREChar r)