{-# LANGUAGE GADTs #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE CPP #-} module Text.Regex.Applicative.Types where import Control.Applicative import Control.Monad ((<=<)) import Data.Filtrable (Filtrable (..)) import Data.Functor.Identity (Identity (..)) import Data.String #if !MIN_VERSION_base(4,11,0) import Data.Semigroup #endif newtype ThreadId = ThreadId Int -- | A thread either is a result or corresponds to a symbol in the regular -- expression, which is expected by that thread. data Thread s r = Thread { threadId_ :: ThreadId , _threadCont :: s -> [Thread s r] } | Accept r -- | Returns thread identifier. This will be 'Just' for ordinary threads and -- 'Nothing' for results. threadId :: Thread s r -> Maybe ThreadId threadId Thread { threadId_ = i } = Just i threadId _ = Nothing data Greediness = Greedy | NonGreedy deriving (Show, Read, Eq, Ord, Enum) -- | Type of regular expressions that recognize symbols of type @s@ and -- produce a result of type @a@. -- -- Regular expressions can be built using 'Functor', 'Applicative', -- 'Alternative', and 'Filtrable' instances in the following natural way: -- -- * @f@ '<$>' @ra@ matches iff @ra@ matches, and its return value is the result -- of applying @f@ to the return value of @ra@. -- -- * 'pure' @x@ matches the empty string (i.e. it does not consume any symbols), -- and its return value is @x@ -- -- * @rf@ '<*>' @ra@ matches a string iff it is a concatenation of two -- strings: one matched by @rf@ and the other matched by @ra@. The return value -- is @f a@, where @f@ and @a@ are the return values of @rf@ and @ra@ -- respectively. -- -- * @ra@ '<|>' @rb@ matches a string which is accepted by either @ra@ or @rb@. -- It is left-biased, so if both can match, the result of @ra@ is used. -- -- * 'empty' is a regular expression which does not match any string. -- -- * 'many' @ra@ matches concatenation of zero or more strings matched by @ra@ -- and returns the list of @ra@'s return values on those strings. -- -- * 'some' @ra@ matches concatenation of one or more strings matched by @ra@ -- and returns the list of @ra@'s return values on those strings. -- -- * 'catMaybes' @ram@ matches iff @ram@ matches and produces 'Just _'. -- -- * @ra@ '<>' @rb@ matches @ra@ followed by @rb@. The return value is @a <> b@, -- where @a@ and @b@ are the return values of @ra@ and @rb@ respectively. -- (See <https://github.com/feuerbach/regex-applicative/issues/37#issue-499781703> -- for an example usage.) -- -- * 'mempty' matches the empty string (i.e. it does not consume any symbols), -- and its return value is the 'mempty' value of type @a@. data RE s a where Eps :: RE s () Symbol :: ThreadId -> (s -> Maybe a) -> RE s a Alt :: RE s a -> RE s a -> RE s a App :: RE s (a -> b) -> RE s a -> RE s b Fmap :: (a -> b) -> RE s a -> RE s b CatMaybes :: RE s (Maybe a) -> RE s a Fail :: RE s a Rep :: Greediness -- repetition may be greedy or not -> (b -> a -> b) -- folding function (like in foldl) -> b -- the value for zero matches, and also the initial value -- for the folding function -> RE s a -> RE s b Void :: RE s a -> RE s () -- | Traverse each (reflexive, transitive) subexpression of a 'RE', depth-first and post-order. traversePostorder :: forall s a m . Monad m => (forall a . RE s a -> m (RE s a)) -> RE s a -> m (RE s a) traversePostorder f = go where go :: forall a . RE s a -> m (RE s a) go = f <=< \ case Eps -> pure Eps Symbol i p -> pure (Symbol i p) Alt a b -> Alt <$> go a <*> go b App a b -> App <$> go a <*> go b Fmap g a -> Fmap g <$> go a CatMaybes a -> CatMaybes <$> go a Fail -> pure Fail Rep greed g b a -> Rep greed g b <$> go a Void a -> Void <$> go a -- | Fold each (reflexive, transitive) subexpression of a 'RE', depth-first and post-order. foldMapPostorder :: Monoid b => (forall a . RE s a -> b) -> RE s a -> b foldMapPostorder f = fst . traversePostorder ((,) <$> f <*> id) -- | Map each (reflexive, transitive) subexpression of a 'RE'. mapRE :: (forall a . RE s a -> RE s a) -> RE s a -> RE s a mapRE f = runIdentity . traversePostorder (Identity . f) instance Functor (RE s) where fmap f x = Fmap f x f <$ x = pure f <* x instance Applicative (RE s) where pure x = const x <$> Eps a1 <*> a2 = App a1 a2 a *> b = pure (const id) <*> Void a <*> b a <* b = pure const <*> a <*> Void b instance Alternative (RE s) where a1 <|> a2 = Alt a1 a2 empty = Fail many a = reverse <$> Rep Greedy (flip (:)) [] a some a = (:) <$> a <*> many a -- | @since 0.3.4 instance Filtrable (RE s) where catMaybes = CatMaybes instance (char ~ Char, string ~ String) => IsString (RE char string) where fromString = string -- | @since 0.3.4 instance Semigroup a => Semigroup (RE s a) where x <> y = (<>) <$> x <*> y -- | @since 0.3.4 instance Monoid a => Monoid (RE s a) where mempty = pure mempty -- | Match and return the given sequence of symbols. -- -- Note that there is an 'IsString' instance for regular expression, so -- if you enable the @OverloadedStrings@ language extension, you can write -- @string \"foo\"@ simply as @\"foo\"@. -- -- Example: -- -- >{-# LANGUAGE OverloadedStrings #-} -- >import Text.Regex.Applicative -- > -- >number = "one" *> pure 1 <|> "two" *> pure 2 -- > -- >main = print $ "two" =~ number string :: Eq a => [a] -> RE a [a] string = traverse sym -- | Match and return a single symbol which satisfies the predicate psym :: (s -> Bool) -> RE s s psym p = msym (\s -> if p s then Just s else Nothing) -- | Like 'psym', but allows to return a computed value instead of the -- original symbol msym :: (s -> Maybe a) -> RE s a msym p = Symbol (error "Not numbered symbol") p -- | Match and return the given symbol sym :: Eq s => s -> RE s s sym s = psym (s ==)