Safe Haskell	Safe
Language	Haskell2010

Kleene.ERE

Contents

Construction
Derivative
Conversion
Transition map
Equivalence
Other

Synopsis

data ERE c
- = EREChars (RSet c)
- | EREAppend [ERE c]
- | EREUnion (RSet c) (Set (ERE c))
- | EREStar (ERE c)
- | ERENot (ERE c)
empty :: ERE c
eps :: ERE c
char :: c -> ERE c
charRange :: Ord c => c -> c -> ERE c
anyChar :: Bounded c => ERE c
appends :: Eq c => [ERE c] -> ERE c
unions :: (Ord c, Enum c) => [ERE c] -> ERE c
intersections :: (Ord c, Enum c) => [ERE c] -> ERE c
star :: (Ord c, Bounded c) => ERE c -> ERE c
string :: [c] -> ERE c
complement :: ERE c -> ERE c
nullable :: ERE c -> Bool
derivate :: (Ord c, Enum c) => c -> ERE c -> ERE c
fromRE :: Ord c => RE c -> ERE c
transitionMap :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> Map (ERE c) (SF c (ERE c))
leadingChars :: (Ord c, Enum c, Bounded c) => ERE c -> Partition c
equivalent :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> ERE c -> Bool
isEmpty :: ERE c -> Bool
isEverything :: ERE c -> Bool

Documentation

data ERE c Source #

Extended regular expression

It's both, Kleene and Boolean algebra. (If we add only intersections, it wouldn't be Boolean).

Note: we don't have special constructor for intersections. We use de Morgan formula \(a \land b = \neg (\neg a \lor \neg b)\).

>>> putPretty $ asEREChar $ "a" /\ "b"
^~(~a|~b)$

There is no generator, as intersections makes it hard.

Constructors

EREChars (RSet c)	Single character
EREAppend [ERE c]	Concatenation
EREUnion (RSet c) (Set (ERE c))	Union
EREStar (ERE c)	Kleene star
ERENot (ERE c)	Complement

Instances

ToLatin1 ERE Source #
Instance details Defined in Kleene.ERE Methods toLatin1 :: ERE Char -> ERE Word8 Source #
Complement c (ERE c) Source #
Instance details Defined in Kleene.ERE Methods complement :: ERE c -> ERE c Source #
(Ord c, Enum c, Bounded c) => TransitionMap c (ERE c) Source #
Instance details Defined in Kleene.ERE Methods transitionMap :: ERE c -> Map (ERE c) (SF c (ERE c)) Source #
(Ord c, Enum c, Bounded c) => Equivalent c (ERE c) Source #
Instance details Defined in Kleene.ERE Methods equivalent :: ERE c -> ERE c -> Bool Source #
(Ord c, Enum c) => Match c (ERE c) Source #
Instance details Defined in Kleene.ERE Methods match :: ERE c -> [c] -> Bool Source # match8 :: ERE c -> ByteString -> Bool Source #
(Ord c, Enum c) => Derivate c (ERE c) Source #
Instance details Defined in Kleene.ERE Methods nullable :: ERE c -> Bool Source # derivate :: c -> ERE c -> ERE c Source #
(Ord c, Enum c, Bounded c) => FiniteKleene c (ERE c) Source #
Instance details Defined in Kleene.ERE Methods everything :: ERE c Source # charRange :: c -> c -> ERE c Source # fromRSet :: RSet c -> ERE c Source # dot :: ERE c Source # anyChar :: ERE c Source # notChar :: c -> ERE c Source #
(Ord c, Enum c, Bounded c) => CharKleene c (ERE c) Source #
Instance details Defined in Kleene.ERE Methods char :: c -> ERE c Source # string :: [c] -> ERE c Source #
Eq c => Eq (ERE c) Source #
Instance details Defined in Kleene.ERE Methods (==) :: ERE c -> ERE c -> Bool # (/=) :: ERE c -> ERE c -> Bool #
Ord c => Ord (ERE c) Source #
Instance details Defined in Kleene.ERE Methods compare :: ERE c -> ERE c -> Ordering # (<) :: ERE c -> ERE c -> Bool # (<=) :: ERE c -> ERE c -> Bool # (>) :: ERE c -> ERE c -> Bool # (>=) :: ERE c -> ERE c -> Bool # max :: ERE c -> ERE c -> ERE c # min :: ERE c -> ERE c -> ERE c #
Show c => Show (ERE c) Source #
Instance details Defined in Kleene.ERE Methods showsPrec :: Int -> ERE c -> ShowS # show :: ERE c -> String # showList :: [ERE c] -> ShowS #
c ~ Char => IsString (ERE c) Source #
Instance details Defined in Kleene.ERE Methods fromString :: String -> ERE c #
Eq c => Semigroup (ERE c) Source #
Instance details Defined in Kleene.ERE Methods (<>) :: ERE c -> ERE c -> ERE c # sconcat :: NonEmpty (ERE c) -> ERE c # stimes :: Integral b => b -> ERE c -> ERE c #
Eq c => Monoid (ERE c) Source #
Instance details Defined in Kleene.ERE Methods mempty :: ERE c # mappend :: ERE c -> ERE c -> ERE c # mconcat :: [ERE c] -> ERE c #
(Ord c, Enum c, Bounded c, Arbitrary c) => Arbitrary (ERE c) Source #
Instance details Defined in Kleene.ERE Methods arbitrary :: Gen (ERE c) # shrink :: ERE c -> [ERE c] #
CoArbitrary c => CoArbitrary (ERE c) Source #
Instance details Defined in Kleene.ERE Methods coarbitrary :: ERE c -> Gen b -> Gen b #
(Ord c, Enum c) => Lattice (ERE c) Source #
Instance details Defined in Kleene.ERE Methods (\/) :: ERE c -> ERE c -> ERE c # (/\) :: ERE c -> ERE c -> ERE c #
(Ord c, Enum c) => BoundedJoinSemiLattice (ERE c) Source #
Instance details Defined in Kleene.ERE Methods bottom :: ERE c #
(Ord c, Enum c) => BoundedMeetSemiLattice (ERE c) Source #
Instance details Defined in Kleene.ERE Methods top :: ERE c #
c ~ Char => Pretty (ERE c) Source #
Instance details Defined in Kleene.ERE Methods pretty :: ERE c -> String Source # prettyS :: ERE c -> ShowS Source #
(Ord c, Enum c, Bounded c) => Kleene (ERE c) Source #
Instance details Defined in Kleene.ERE Methods empty :: ERE c Source # eps :: ERE c Source # appends :: [ERE c] -> ERE c Source # unions :: [ERE c] -> ERE c Source # star :: ERE c -> ERE c Source #

Construction

Binary operators are

<> for append
\/ for union
/\ for intersection

empty :: ERE c Source #

Empty regex. Doesn't accept anything.

>>> putPretty (empty :: ERE Char)
^[]$

>>> putPretty (bottom :: ERE Char)
^[]$

match (empty :: ERE Char) (s :: String) === False

eps :: ERE c Source #

Empty string. Note: different than empty.

>>> putPretty eps
^$

>>> putPretty (mempty :: ERE Char)
^$

match (eps :: ERE Char) s === null (s :: String)

char :: c -> ERE c Source #

>>> putPretty (char 'x')
^x$

charRange :: Ord c => c -> c -> ERE c Source #

>>> putPretty $ charRange 'a' 'z'
^[a-z]$

anyChar :: Bounded c => ERE c Source #

Any character. Note: different than dot!

>>> putPretty anyChar
^[^]$

appends :: Eq c => [ERE c] -> ERE c Source #

Concatenate regular expressions.

asEREChar r <> empty === empty

empty <> asEREChar r === empty

(asEREChar r <> s) <> t === r <> (s <> t)

asEREChar r <> eps === r

eps <> asEREChar r === r

unions :: (Ord c, Enum c) => [ERE c] -> ERE c Source #

Union of regular expressions.

asEREChar r \/ r === r

asEREChar r \/ s === s \/ r

(asEREChar r \/ s) \/ t === r \/ (s \/ t)

empty \/ asEREChar r === r

asEREChar r \/ empty === r

everything \/ asEREChar r === everything

asEREChar r \/ everything === everything

intersections :: (Ord c, Enum c) => [ERE c] -> ERE c Source #

Intersection of regular expressions.

asEREChar r /\ r === r

asEREChar r /\ s === s /\ r

(asEREChar r /\ s) /\ t === r /\ (s /\ t)

empty /\ asEREChar r === empty

asEREChar r /\ empty === empty

everything /\ asEREChar r === r

asEREChar r /\ everything === r

star :: (Ord c, Bounded c) => ERE c -> ERE c Source #

Kleene star.

star (star r) === star (asEREChar r)

star eps     === asEREChar eps

star empty   === asEREChar eps

star anyChar === asEREChar everything

star (asEREChar r \/ eps) === star r

star (char c \/ eps) === star (char (c :: Char))

star (empty \/ eps) === eps

string :: [c] -> ERE c Source #

Literal string.

>>> putPretty ("foobar" :: ERE Char)
^foobar$

>>> putPretty ("(.)" :: ERE Char)
^\(\.\)$

complement :: ERE c -> ERE c Source #

Complement.

complement (complement r) === asEREChar r

Derivative

nullable :: ERE c -> Bool Source #

We say that a regular expression r is nullable if the language it defines contains the empty string.

>>> nullable eps
True

>>> nullable (star "x")
True

>>> nullable "foo"
False

>>> nullable (complement eps)
False

derivate :: (Ord c, Enum c) => c -> ERE c -> ERE c Source #

Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\) with respect to a symbol \(a \in \Sigma\) is the language that includes only those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).

>>> putPretty $ derivate 'f' "foobar"
^oobar$

>>> putPretty $ derivate 'x' $ "xyz" \/ "abc"
^yz$

>>> putPretty $ derivate 'x' $ star "xyz"
^yz(xyz)*$

Conversion

fromRE :: Ord c => RE c -> ERE c Source #

Convert from ordinary regular expression, RE.

Transition map

transitionMap :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> Map (ERE c) (SF c (ERE c)) Source #

Transition map. Used to construct DFA.

>>> void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: ERE Char)
^[]$ : \_ -> "^[]$"
^b$ : \x -> if
    | x <= 'a'  -> "^[]$"
    | x <= 'b'  -> "^$"
    | otherwise -> "^[]$"
^$ : \_ -> "^[]$"
^ab$ : \x -> if
    | x <= '`'  -> "^[]$"
    | x <= 'a'  -> "^b$"
    | otherwise -> "^[]$"

leadingChars :: (Ord c, Enum c, Bounded c) => ERE c -> Partition c Source #

Leading character sets of regular expression.

>>> leadingChars "foo"
fromSeparators "ef"

>>> leadingChars (star "b" <> star "e")
fromSeparators "abde"

>>> leadingChars (charRange 'b' 'z')
fromSeparators "az"

Equivalence

equivalent :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> ERE c -> Bool Source #

Whether two regexps are equivalent.

equivalent re1 re2 = forall s. match re1 s == match re1 s

>>> let re1 = star "a" <> "a"
>>> let re2 = "a" <> star "a"

These are different regular expressions, even we perform some normalisation-on-construction:

>>> re1 == re2
False

They are however equivalent:

>>> equivalent re1 re2
True

The algorithm works by executing states on "product" regexp, and checking whether all resulting states are both accepting or rejecting.

re1 == re2 ==> equivalent re1 re2

More examples

>>> let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
>>> example re1 re2
^a*a$
^aa*$
True

>>> example (star "aa") (star "aaa")
^(aa)*$
^(aaa)*$
False

>>> example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
^(aa)*(aaa)*$
^(aaa)*(aa)*$
True

>>> example (star ("a" \/ "b")) (star $ star "a" <> star "b")
^[a-b]*$
^(a*b*)*$
True

Other

isEmpty :: ERE c -> Bool Source #

Whether ERE is (structurally) equal to empty.

isEverything :: ERE c -> Bool Source #

Whether ERE is (structurally) equal to everything.