-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Pure Haskell Tagged DFA Backend for "Text.Regex" (regex-base) -- -- This package provides a pure Haskell "Tagged" DFA regex engine for -- regex-base. This implementation was inspired by the algorithm -- (and Master's thesis) behind the regular expression library known as -- TRE or libtre. -- -- Please consult the Text.Regex.TDFA module for API documentation -- including a tutorial with usage examples; see also -- https://wiki.haskell.org/Regular_expressions for general -- information about regular expression support in Haskell. @package regex-tdfa @version 1.3.2 module Data.IntMap.CharMap2 newtype CharMap a CharMap :: IntMap a -> CharMap a [unCharMap] :: CharMap a -> IntMap a type Key = Char (!) :: CharMap a -> Key -> a (\\) :: CharMap a -> CharMap b -> CharMap a null :: CharMap a -> Bool size :: CharMap a -> Int member :: Key -> CharMap a -> Bool notMember :: Key -> CharMap a -> Bool lookup :: Key -> CharMap a -> Maybe a findWithDefault :: a -> Key -> CharMap a -> a empty :: CharMap a singleton :: Key -> a -> CharMap a insert :: Key -> a -> CharMap a -> CharMap a insertWith :: (a -> a -> a) -> Key -> a -> CharMap a -> CharMap a insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> CharMap a insertLookupWithKey :: (Key -> a -> a -> a) -> Key -> a -> CharMap a -> (Maybe a, CharMap a) delete :: Key -> CharMap a -> CharMap a adjust :: (a -> a) -> Key -> CharMap a -> CharMap a adjustWithKey :: (Key -> a -> a) -> Key -> CharMap a -> CharMap a update :: (a -> Maybe a) -> Key -> CharMap a -> CharMap a updateWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> CharMap a updateLookupWithKey :: (Key -> a -> Maybe a) -> Key -> CharMap a -> (Maybe a, CharMap a) union :: CharMap a -> CharMap a -> CharMap a unionWith :: (a -> a -> a) -> CharMap a -> CharMap a -> CharMap a unionWithKey :: (Key -> a -> a -> a) -> CharMap a -> CharMap a -> CharMap a unions :: [CharMap a] -> CharMap a unionsWith :: (a -> a -> a) -> [CharMap a] -> CharMap a difference :: CharMap a -> CharMap b -> CharMap a differenceWith :: (a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a differenceWithKey :: (Key -> a -> b -> Maybe a) -> CharMap a -> CharMap b -> CharMap a intersection :: CharMap a -> CharMap b -> CharMap a intersectionWith :: (a -> b -> a) -> CharMap a -> CharMap b -> CharMap a intersectionWithKey :: (Key -> a -> b -> a) -> CharMap a -> CharMap b -> CharMap a map :: (a -> b) -> CharMap a -> CharMap b mapWithKey :: (Key -> a -> b) -> CharMap a -> CharMap b mapAccum :: (a -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c) mapAccumWithKey :: (a -> Key -> b -> (a, c)) -> a -> CharMap b -> (a, CharMap c) fold :: (a -> b -> b) -> b -> CharMap a -> b foldWithKey :: (Key -> a -> b -> b) -> b -> CharMap a -> b elems :: CharMap a -> [a] keys :: CharMap a -> [Key] keysSet :: CharMap a -> IntSet assocs :: CharMap a -> [(Key, a)] toList :: CharMap a -> [(Key, a)] fromList :: [(Key, a)] -> CharMap a fromListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a fromListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a toAscList :: CharMap a -> [(Key, a)] fromAscList :: [(Key, a)] -> CharMap a fromAscListWith :: (a -> a -> a) -> [(Key, a)] -> CharMap a fromAscListWithKey :: (Key -> a -> a -> a) -> [(Key, a)] -> CharMap a fromDistinctAscList :: [(Key, a)] -> CharMap a filter :: (a -> Bool) -> CharMap a -> CharMap a filterWithKey :: (Key -> a -> Bool) -> CharMap a -> CharMap a partition :: (a -> Bool) -> CharMap a -> (CharMap a, CharMap a) partitionWithKey :: (Key -> a -> Bool) -> CharMap a -> (CharMap a, CharMap a) mapMaybe :: (a -> Maybe b) -> CharMap a -> CharMap b mapMaybeWithKey :: (Key -> a -> Maybe b) -> CharMap a -> CharMap b mapEither :: (a -> Either b c) -> CharMap a -> (CharMap b, CharMap c) mapEitherWithKey :: (Key -> a -> Either b c) -> CharMap a -> (CharMap b, CharMap c) split :: Key -> CharMap a -> (CharMap a, CharMap a) splitLookup :: Key -> CharMap a -> (CharMap a, Maybe a, CharMap a) isSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool isSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool isProperSubmapOf :: Eq a => CharMap a -> CharMap a -> Bool isProperSubmapOfBy :: (a -> b -> Bool) -> CharMap a -> CharMap b -> Bool showTree :: Show a => CharMap a -> String showTreeWith :: Show a => Bool -> Bool -> CharMap a -> String instance GHC.Show.Show a => GHC.Show.Show (Data.IntMap.CharMap2.CharMap a) instance GHC.Read.Read a => GHC.Read.Read (Data.IntMap.CharMap2.CharMap a) instance GHC.Classes.Ord a => GHC.Classes.Ord (Data.IntMap.CharMap2.CharMap a) instance GHC.Classes.Eq a => GHC.Classes.Eq (Data.IntMap.CharMap2.CharMap a) instance GHC.Base.Semigroup (Data.IntMap.CharMap2.CharMap a) instance GHC.Base.Monoid (Data.IntMap.CharMap2.CharMap a) instance GHC.Base.Functor Data.IntMap.CharMap2.CharMap module Data.IntSet.EnumSet2 newtype EnumSet e EnumSet :: IntSet -> EnumSet e [unEnumSet] :: EnumSet e -> IntSet (\\) :: Enum e => EnumSet e -> EnumSet e -> EnumSet e null :: Enum e => EnumSet e -> Bool size :: Enum e => EnumSet e -> Int member :: Enum e => e -> EnumSet e -> Bool notMember :: Enum e => Int -> EnumSet e -> Bool isSubsetOf :: Enum e => EnumSet e -> EnumSet e -> Bool isProperSubsetOf :: Enum e => EnumSet e -> EnumSet e -> Bool empty :: Enum e => EnumSet e singleton :: Enum e => e -> EnumSet e insert :: Enum e => e -> EnumSet e -> EnumSet e delete :: Enum e => e -> EnumSet e -> EnumSet e union :: Enum e => EnumSet e -> EnumSet e -> EnumSet e unions :: Enum e => [EnumSet e] -> EnumSet e difference :: Enum e => EnumSet e -> EnumSet e -> EnumSet e intersection :: Enum e => EnumSet e -> EnumSet e -> EnumSet e filter :: Enum e => (e -> Bool) -> EnumSet e -> EnumSet e partition :: Enum e => (e -> Bool) -> EnumSet e -> (EnumSet e, EnumSet e) split :: Enum e => e -> EnumSet e -> (EnumSet e, EnumSet e) splitMember :: Enum e => e -> EnumSet e -> (EnumSet e, Bool, EnumSet e) map :: Enum e => (e -> e) -> EnumSet e -> EnumSet e fold :: Enum e => (e -> b -> b) -> b -> EnumSet e -> b elems :: Enum e => EnumSet e -> [e] toList :: Enum e => EnumSet e -> [e] fromList :: Enum e => [e] -> EnumSet e toAscList :: Enum e => EnumSet e -> [e] fromAscList :: Enum e => [e] -> EnumSet e fromDistinctAscList :: Enum e => [e] -> EnumSet e showTree :: Enum e => EnumSet e -> String showTreeWith :: Enum e => Bool -> Bool -> EnumSet e -> String instance GHC.Show.Show (Data.IntSet.EnumSet2.EnumSet e) instance GHC.Read.Read (Data.IntSet.EnumSet2.EnumSet e) instance GHC.Classes.Ord (Data.IntSet.EnumSet2.EnumSet e) instance GHC.Classes.Eq (Data.IntSet.EnumSet2.EnumSet e) instance GHC.Base.Semigroup (Data.IntSet.EnumSet2.EnumSet e) instance GHC.Base.Monoid (Data.IntSet.EnumSet2.EnumSet e) module Data.IntMap.EnumMap2 newtype EnumMap k a EnumMap :: IntMap a -> EnumMap k a [unEnumMap] :: EnumMap k a -> IntMap a (!) :: Enum key => EnumMap key a -> key -> a (\\) :: Enum key => EnumMap key a -> EnumMap key b -> EnumMap key a null :: Enum key => EnumMap key a -> Bool size :: Enum key => EnumMap key a -> Int member :: Enum key => key -> EnumMap key a -> Bool notMember :: Enum key => key -> EnumMap key a -> Bool lookup :: Enum key => key -> EnumMap key a -> Maybe a findWithDefault :: Enum key => a -> key -> EnumMap key a -> a empty :: Enum key => EnumMap key a singleton :: Enum key => key -> a -> EnumMap key a insert :: Enum key => key -> a -> EnumMap key a -> EnumMap key a insertWith :: Enum key => (a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a insertWithKey :: Enum key => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> EnumMap key a insertLookupWithKey :: Enum key => (key -> a -> a -> a) -> key -> a -> EnumMap key a -> (Maybe a, EnumMap key a) delete :: Enum key => key -> EnumMap key a -> EnumMap key a adjust :: Enum key => (a -> a) -> key -> EnumMap key a -> EnumMap key a adjustWithKey :: Enum key => (key -> a -> a) -> key -> EnumMap key a -> EnumMap key a update :: Enum key => (a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a updateWithKey :: Enum key => (key -> a -> Maybe a) -> key -> EnumMap key a -> EnumMap key a updateLookupWithKey :: Enum key => (key -> a -> Maybe a) -> key -> EnumMap key a -> (Maybe a, EnumMap key a) union :: Enum key => EnumMap key a -> EnumMap key a -> EnumMap key a unionWith :: Enum key => (a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a unionWithKey :: Enum key => (key -> a -> a -> a) -> EnumMap key a -> EnumMap key a -> EnumMap key a unions :: Enum key => [EnumMap key a] -> EnumMap key a unionsWith :: Enum key => (a -> a -> a) -> [EnumMap key a] -> EnumMap key a difference :: Enum key => EnumMap key a -> EnumMap key b -> EnumMap key a differenceWith :: Enum key => (a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a differenceWithKey :: Enum key => (key -> a -> b -> Maybe a) -> EnumMap key a -> EnumMap key b -> EnumMap key a intersection :: Enum key => EnumMap key a -> EnumMap key b -> EnumMap key a intersectionWith :: Enum key => (a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a intersectionWithKey :: Enum key => (key -> a -> b -> a) -> EnumMap key a -> EnumMap key b -> EnumMap key a map :: Enum key => (a -> b) -> EnumMap key a -> EnumMap key b mapWithKey :: Enum key => (key -> a -> b) -> EnumMap key a -> EnumMap key b mapAccum :: Enum key => (a -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c) mapAccumWithKey :: Enum key => (a -> key -> b -> (a, c)) -> a -> EnumMap key b -> (a, EnumMap key c) fold :: Enum key => (a -> b -> b) -> b -> EnumMap key a -> b foldWithKey :: Enum key => (key -> a -> b -> b) -> b -> EnumMap key a -> b elems :: Enum key => EnumMap key a -> [a] keys :: Enum key => EnumMap key a -> [key] keysSet :: Enum key => EnumMap key a -> EnumSet key assocs :: Enum key => EnumMap key a -> [(key, a)] toList :: Enum key => EnumMap key a -> [(key, a)] fromList :: Enum key => [(key, a)] -> EnumMap key a fromListWith :: Enum key => (a -> a -> a) -> [(key, a)] -> EnumMap key a fromListWithKey :: Enum key => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a toAscList :: Enum key => EnumMap key a -> [(key, a)] fromAscList :: Enum key => [(key, a)] -> EnumMap key a fromAscListWith :: Enum key => (a -> a -> a) -> [(key, a)] -> EnumMap key a fromAscListWithKey :: Enum key => (key -> a -> a -> a) -> [(key, a)] -> EnumMap key a fromDistinctAscList :: Enum key => [(key, a)] -> EnumMap key a filter :: Enum key => (a -> Bool) -> EnumMap key a -> EnumMap key a filterWithKey :: Enum key => (key -> a -> Bool) -> EnumMap key a -> EnumMap key a partition :: Enum key => (a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a) partitionWithKey :: Enum key => (key -> a -> Bool) -> EnumMap key a -> (EnumMap key a, EnumMap key a) mapMaybe :: Enum key => (a -> Maybe b) -> EnumMap key a -> EnumMap key b mapMaybeWithKey :: Enum key => (key -> a -> Maybe b) -> EnumMap key a -> EnumMap key b mapEither :: Enum key => (a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c) mapEitherWithKey :: Enum key => (key -> a -> Either b c) -> EnumMap key a -> (EnumMap key b, EnumMap key c) split :: Enum key => key -> EnumMap key a -> (EnumMap key a, EnumMap key a) splitLookup :: Enum key => key -> EnumMap key a -> (EnumMap key a, Maybe a, EnumMap key a) isSubmapOf :: (Enum key, Eq a) => EnumMap key a -> EnumMap key a -> Bool isSubmapOfBy :: Enum key => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool isProperSubmapOf :: (Enum key, Eq a) => EnumMap key a -> EnumMap key a -> Bool isProperSubmapOfBy :: Enum key => (a -> b -> Bool) -> EnumMap key a -> EnumMap key b -> Bool showTree :: (Enum key, Show a) => EnumMap key a -> String showTreeWith :: (Enum key, Show a) => Bool -> Bool -> EnumMap key a -> String instance GHC.Show.Show a => GHC.Show.Show (Data.IntMap.EnumMap2.EnumMap k a) instance GHC.Read.Read a => GHC.Read.Read (Data.IntMap.EnumMap2.EnumMap k a) instance GHC.Classes.Ord a => GHC.Classes.Ord (Data.IntMap.EnumMap2.EnumMap k a) instance GHC.Classes.Eq a => GHC.Classes.Eq (Data.IntMap.EnumMap2.EnumMap k a) instance GHC.Classes.Ord k => GHC.Base.Semigroup (Data.IntMap.EnumMap2.EnumMap k a) instance GHC.Classes.Ord k => GHC.Base.Monoid (Data.IntMap.EnumMap2.EnumMap k a) instance GHC.Classes.Ord k => GHC.Base.Functor (Data.IntMap.EnumMap2.EnumMap k) instance GHC.Classes.Ord k => Data.Foldable.Foldable (Data.IntMap.EnumMap2.EnumMap k) -- | This creates a lazy Trie based on a finite range of Ints and is used -- to memorize a function over the subsets of this range. -- -- To create a Trie you need two supply 2 things * Range of keys to bound -- * A function or functions used to construct the value for a subset of -- keys -- -- The Trie uses the Array type internally. module Text.Regex.TDFA.IntArrTrieSet data TrieSet v TrieSet :: v -> Array Int (TrieSet v) -> TrieSet v [value] :: TrieSet v -> v [next] :: TrieSet v -> Array Int (TrieSet v) -- | This is the accessor for the Trie. The list of keys should be sorted. lookupAsc :: TrieSet v -> [Int] -> v -- | This is a Trie constructor for a complete range of keys. fromBounds :: (Int, Int) -> ([Int] -> v) -> TrieSet v -- | This is a Trie constructor for a complete range of keys that uses a -- function from single values and a merge operation on values to fill -- the Trie. fromSinglesMerge :: v -> (v -> v -> v) -> (Int, Int) -> (Int -> v) -> TrieSet v -- | This is a Trie constructor for a complete range of keys that uses a -- function from single values and a sum operation of values to fill the -- Trie. fromSinglesSum :: ([v] -> v) -> (Int, Int) -> (Int -> v) -> TrieSet v -- | Common provides simple functions to the backend. It defines most of -- the data types. All modules should call error via the -- common_error function below. module Text.Regex.TDFA.Common look :: Int -> IntMap a -> a common_error :: String -> String -> a on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2 -- | After sort or sortBy the use of nub or -- nubBy can be replaced by norep or norepBy. norep :: Eq a => [a] -> [a] -- | After sort or sortBy the use of nub or -- nubBy can be replaced by norep or norepBy. norepBy :: (a -> a -> Bool) -> [a] -> [a] mapFst :: Functor f => (t -> t2) -> f (t, t1) -> f (t2, t1) mapSnd :: Functor f => (t1 -> t2) -> f (t, t1) -> f (t, t2) fst3 :: (a, b, c) -> a snd3 :: (a, b, c) -> b thd3 :: (a, b, c) -> c flipOrder :: Ordering -> Ordering noWin :: WinTags -> Bool -- | Used to track elements of the pattern that accept characters or are -- anchors. newtype DoPa DoPa :: Int -> DoPa [dopaIndex] :: DoPa -> Int -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (\1, \2, etc). -- Controls enabling extra anchor syntax. data CompOption CompOption :: Bool -> Bool -> Bool -> Bool -> Bool -> CompOption -- | True in blankCompOpt and defaultCompOpt. [caseSensitive] :: CompOption -> Bool -- | False in blankCompOpt, True in defaultCompOpt. Compile -- for newline-sensitive matching. -- -- From regexp man page: "By default, newline is a completely -- ordinary character with no special meaning in either REs or strings. -- With this flag, inverted bracket expressions [^ and -- . never match newline, a ^ anchor matches the null -- string after any newline in the string in addition to its normal -- function, and the $ anchor matches the null string before any -- newline in the string in addition to its normal function." [multiline] :: CompOption -> Bool -- | True (and therefore right associative) in blankCompOpt and -- defaultCompOpt. [rightAssoc] :: CompOption -> Bool -- | False in blankCompOpt, True in defaultCompOpt. Enables -- the extended non-POSIX syntax described in Text.Regex.TDFA -- haddock documentation. [newSyntax] :: CompOption -> Bool -- | False by default. This is POSIX correct but it takes space and is -- slower. Setting this to True will improve performance, and should be -- done if you plan to set the captureGroups ExecOption to -- False. [lastStarGreedy] :: CompOption -> Bool data ExecOption ExecOption :: Bool -> ExecOption -- | True by default. Set to False to improve speed (and space). [captureGroups] :: ExecOption -> Bool -- | Used by implementation to name certain Postions during -- matching. Identity of Position tag to set during a transition. type Tag = Int -- | Internal use to indicate type of tag and preference for larger or -- smaller Positions. data OP Maximize :: OP Minimize :: OP Orbit :: OP Ignore :: OP -- | Internal NFA node identity number. type Index = Int -- | Internal DFA identity is this Set of NFA Index. type SetIndex = IntSet -- | Index into the text being searched. type Position = Int -- | GroupIndex is for indexing submatches from capturing -- parenthesized groups (PGroup or Group). type GroupIndex = Int -- | GroupInfo collects the parent and tag information for an -- instance of a group. data GroupInfo GroupInfo :: GroupIndex -> Tag -> GroupInfo [thisIndex, parentIndex] :: GroupInfo -> GroupIndex [startTag, stopTag, flagTag] :: GroupInfo -> Tag -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker. data Regex Regex :: DFA -> Index -> (Index, Index) -> (Tag, Tag) -> TrieSet DFA -> Array Tag OP -> Array GroupIndex [GroupInfo] -> Bool -> CompOption -> ExecOption -> Regex -- | starting DFA state [regex_dfa] :: Regex -> DFA -- | index of starting state [regex_init] :: Regex -> Index -- | indexes of smallest and largest states [regex_b_index] :: Regex -> (Index, Index) -- | indexes of smallest and largest tags [regex_b_tags] :: Regex -> (Tag, Tag) -- | All DFA states [regex_trie] :: Regex -> TrieSet DFA -- | information about each tag [regex_tags] :: Regex -> Array Tag OP -- | information about each group [regex_groups] :: Regex -> Array GroupIndex [GroupInfo] -- | used for optimizing execution [regex_isFrontAnchored] :: Regex -> Bool [regex_compOptions] :: Regex -> CompOption [regex_execOptions] :: Regex -> ExecOption data WinEmpty WinEmpty :: Instructions -> WinEmpty WinTest :: WhichTest -> Maybe WinEmpty -> Maybe WinEmpty -> WinEmpty -- | Internal NFA node type. data QNFA QNFA :: Index -> QT -> QNFA [q_id] :: QNFA -> Index [q_qt] :: QNFA -> QT -- | Internal to QNFA type. data QT Simple :: WinTags -> CharMap QTrans -> QTrans -> QT -- | empty transitions to the virtual winning state [qt_win] :: QT -> WinTags -- | all ways to leave this QNFA to other or the same QNFA [qt_trans] :: QT -> CharMap QTrans -- | default ways to leave this QNFA to other or the same QNFA [qt_other] :: QT -> QTrans Testing :: WhichTest -> EnumSet DoPa -> QT -> QT -- | The test to perform [qt_test] :: QT -> WhichTest -- | location(s) of the anchor(s) in the original regexp [qt_dopas] :: QT -> EnumSet DoPa -- | use qt_a if test is True, else use qt_b -- | use qt_a if test is True, else use qt_b [qt_a, qt_b] :: QT -> QT -- | Internal type to represent the tagged transition from one QNFA to -- another (or itself). The key is the Index of the destination QNFA. type QTrans = IntMap [TagCommand] -- | Known predicates, just Beginning of Line (^) and End of Line ($). Also -- support for GNU extensions is being added: \` beginning of buffer, \' -- end of buffer, \< and \> for begin and end of words, \b and \B -- for word boundary and not word boundary. data WhichTest -- | ^ (affected by multiline option) Test_BOL :: WhichTest -- | $ (affected by multiline option) Test_EOL :: WhichTest -- | \` beginning of buffer Test_BOB :: WhichTest -- | \' end ofbuffer Test_EOB :: WhichTest -- | \< beginning of word Test_BOW :: WhichTest -- | \> end of word Test_EOW :: WhichTest -- | \b word boundary Test_EdgeWord :: WhichTest -- | \B not word boundary Test_NotEdgeWord :: WhichTest -- | The things that can be done with a Tag. TagTask and -- ResetGroupStopTask are for tags with Maximize or Minimize OP -- values. ResetOrbitTask and EnterOrbitTask and -- LeaveOrbitTask are for tags with Orbit OP value. data TagTask TagTask :: TagTask ResetGroupStopTask :: TagTask SetGroupStopTask :: TagTask ResetOrbitTask :: TagTask EnterOrbitTask :: TagTask LeaveOrbitTask :: TagTask -- | Ordered list of tags and their associated Task. type TagTasks = [(Tag, TagTask)] -- | When attached to a QTrans the TagTask can be done before or after -- accepting the character. data TagUpdate PreUpdate :: TagTask -> TagUpdate PostUpdate :: TagTask -> TagUpdate -- | Ordered list of tags and their associated update operation. type TagList = [(Tag, TagUpdate)] -- | A TagList and the location of the item in the original pattern that is -- being accepted. type TagCommand = (DoPa, TagList) -- | Ordered list of tags and their associated update operation to perform -- on an empty transition to the virtual winning state. type WinTags = TagList -- | Internal DFA node, identified by the Set of indices of the QNFA nodes -- it represents. data DFA DFA :: SetIndex -> DT -> DFA [d_id] :: DFA -> SetIndex [d_dt] :: DFA -> DT data Transition Transition :: DFA -> DFA -> DTrans -> Transition -- | where to go (maximal), including respawning [trans_many] :: Transition -> DFA -- | where to go, not including respawning [trans_single] :: Transition -> DFA -- | how to go, including respawning [trans_how] :: Transition -> DTrans -- | Internal to the DFA node data DT Simple' :: IntMap Instructions -> CharMap Transition -> Transition -> DT -- | Actions to perform to win [dt_win] :: DT -> IntMap Instructions -- | Transition to accept Char [dt_trans] :: DT -> CharMap Transition -- | default accepting transition [dt_other] :: DT -> Transition Testing' :: WhichTest -> EnumSet DoPa -> DT -> DT -- | The test to perform [dt_test] :: DT -> WhichTest -- | location(s) of the anchor(s) in the original regexp [dt_dopas] :: DT -> EnumSet DoPa -- | use dt_a if test is True else use dt_b -- | use dt_a if test is True else use dt_b [dt_a, dt_b] :: DT -> DT -- | Internal type to represent the commands for the tagged transition. The -- outer IntMap is for the destination Index and the inner -- IntMap is for the Source Index. This is convenient since all -- runtime data going to the same destination must be compared to find -- the best. -- -- A Destination IntMap entry may have an empty Source -- IntMap if and only if the destination is the starting index and -- the NFA or DFA. This instructs the matching engine to spawn a new -- entry starting at the post-update position. type DTrans = IntMap (IntMap (DoPa, Instructions)) -- | Internal convenience type for the text display code. type DTrans' = [(Index, [(Index, (DoPa, ([(Tag, (Position, Bool))], [String])))])] -- | Positions for which a * was re-started while looping. Need to -- append locations at back but compare starting with front, so use -- Seq as a queue. The initial position is saved in basePos -- (and a Maximize Tag), the middle positions in the Seq, and the -- final position is NOT saved in the Orbits (only in a Maximize Tag). data Orbits Orbits :: !Bool -> Position -> Maybe Int -> !Seq Position -> Orbits [inOrbit] :: Orbits -> !Bool [basePos] :: Orbits -> Position [ordinal] :: Orbits -> Maybe Int [getOrbits] :: Orbits -> !Seq Position -- | The newPos and newFlags lists in Instructions are -- sorted by, and unique in, the Tag values data Instructions Instructions :: ![(Tag, Action)] -> !Maybe (Position -> OrbitTransformer) -> Instructions [newPos] :: Instructions -> ![(Tag, Action)] [newOrbits] :: Instructions -> !Maybe (Position -> OrbitTransformer) data Action SetPre :: Action SetPost :: Action SetVal :: Int -> Action type OrbitTransformer = OrbitLog -> OrbitLog type OrbitLog = IntMap Orbits showQT :: QT -> String indent :: [String] -> String showDT :: DT -> String seeDTrans :: DTrans -> String instance GHC.Classes.Ord Text.Regex.TDFA.Common.DoPa instance GHC.Classes.Eq Text.Regex.TDFA.Common.DoPa instance GHC.Show.Show Text.Regex.TDFA.Common.CompOption instance GHC.Read.Read Text.Regex.TDFA.Common.CompOption instance GHC.Show.Show Text.Regex.TDFA.Common.ExecOption instance GHC.Read.Read Text.Regex.TDFA.Common.ExecOption instance GHC.Show.Show Text.Regex.TDFA.Common.OP instance GHC.Classes.Eq Text.Regex.TDFA.Common.OP instance GHC.Show.Show Text.Regex.TDFA.Common.GroupInfo instance GHC.Enum.Enum Text.Regex.TDFA.Common.WhichTest instance GHC.Classes.Ord Text.Regex.TDFA.Common.WhichTest instance GHC.Classes.Eq Text.Regex.TDFA.Common.WhichTest instance GHC.Show.Show Text.Regex.TDFA.Common.WhichTest instance GHC.Classes.Eq Text.Regex.TDFA.Common.TagTask instance GHC.Show.Show Text.Regex.TDFA.Common.TagTask instance GHC.Classes.Eq Text.Regex.TDFA.Common.TagUpdate instance GHC.Show.Show Text.Regex.TDFA.Common.TagUpdate instance GHC.Show.Show Text.Regex.TDFA.Common.Orbits instance GHC.Classes.Eq Text.Regex.TDFA.Common.Action instance GHC.Show.Show Text.Regex.TDFA.Common.Action instance GHC.Show.Show Text.Regex.TDFA.Common.DFA instance GHC.Show.Show Text.Regex.TDFA.Common.WinEmpty instance Text.Regex.Base.RegexLike.RegexOptions Text.Regex.TDFA.Common.Regex Text.Regex.TDFA.Common.CompOption Text.Regex.TDFA.Common.ExecOption instance GHC.Show.Show Text.Regex.TDFA.Common.DT instance GHC.Show.Show Text.Regex.TDFA.Common.Instructions instance GHC.Show.Show Text.Regex.TDFA.Common.QNFA instance GHC.Show.Show Text.Regex.TDFA.Common.QT instance GHC.Classes.Eq Text.Regex.TDFA.Common.QT instance GHC.Enum.Enum Text.Regex.TDFA.Common.DoPa instance GHC.Show.Show Text.Regex.TDFA.Common.DoPa module Text.Regex.TDFA.NewDFA.Uncons class Uncons a uncons :: Uncons a => a -> Maybe (Char, a) instance Text.Regex.TDFA.NewDFA.Uncons.Uncons [GHC.Types.Char] instance Text.Regex.TDFA.NewDFA.Uncons.Uncons (Data.Sequence.Internal.Seq GHC.Types.Char) instance Text.Regex.TDFA.NewDFA.Uncons.Uncons Data.ByteString.Internal.ByteString instance Text.Regex.TDFA.NewDFA.Uncons.Uncons Data.ByteString.Lazy.Internal.ByteString instance Text.Regex.TDFA.NewDFA.Uncons.Uncons Data.Text.Internal.Text instance Text.Regex.TDFA.NewDFA.Uncons.Uncons Data.Text.Internal.Lazy.Text module Text.Regex.TDFA.NewDFA.MakeTest test_singleline :: Uncons text => WhichTest -> Index -> Char -> text -> Bool test_multiline :: Uncons text => WhichTest -> Index -> Char -> text -> Bool -- | Like Engine, but merely checks to see whether any match at all is -- found. module Text.Regex.TDFA.NewDFA.Tester matchTest :: Uncons text => Regex -> text -> Bool -- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String module Text.Regex.TDFA.NewDFA.Engine_NC_FA execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] -- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String module Text.Regex.TDFA.NewDFA.Engine_NC execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] instance GHC.Show.Show Text.Regex.TDFA.NewDFA.Engine_NC.WScratch -- | This is the code for the main engine. This captures the posix -- subexpressions. There is also a non-capturing engine, and a testing -- engine. -- -- It is polymorphic over the internal Uncons type class, and specialized -- to produce the needed variants. module Text.Regex.TDFA.NewDFA.Engine_FA execMatch :: forall text. Uncons text => Regex -> Position -> Char -> text -> [MatchArray] -- | This is the code for the main engine. This captures the posix -- subexpressions. This execMatch also dispatches to -- Engine_NC, Engine_FA, and Engine_FC_NA -- -- It is polymorphic over the internal Uncons type class, and specialized -- to produce the needed variants. module Text.Regex.TDFA.NewDFA.Engine execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] -- | This Text.Regex.TDFA.Pattern module provides the Pattern -- data type and its subtypes. This Pattern type is used to -- represent the parsed form of a regular expression. module Text.Regex.TDFA.Pattern -- | Pattern is the type returned by the regular expression parser -- parseRegex. This is consumed by the -- Text.Regex.TDFA.CorePattern module and the tender leaves are -- nibbled by the Text.Regex.TDFA.TNFA module. -- -- The DoPa field is the index of the component in the regex -- string r. data Pattern -- | (), matches the empty string. PEmpty :: Pattern -- | Group (r). Nothing indicates non-matching -- PGroup (never produced by parser parseRegex). PGroup :: Maybe GroupIndex -> Pattern -> Pattern -- | Alternative r|s (flattened by starTrans). POr :: [Pattern] -> Pattern -- | Sequence rs (flattened by starTrans). PConcat :: [Pattern] -> Pattern -- | Zero or one repetitions r? (eliminated by starTrans). PQuest :: Pattern -> Pattern -- | One or more repetitions r+ (eliminated by starTrans). PPlus :: Pattern -> Pattern -- | Zero or more repetitions r*. True (default) means -- may accept the empty string on its first iteration. PStar :: Bool -> Pattern -> Pattern -- | Given number or repetitions r{n} or r{n,m} -- (eliminated by starTrans). PBound :: Int -> Maybe Int -> Pattern -> Pattern -- | ^ matches beginning of input. PCarat :: DoPa -> Pattern [getDoPa] :: Pattern -> DoPa -- | $ matches end of input. PDollar :: DoPa -> Pattern [getDoPa] :: Pattern -> DoPa -- | . matches any character. PDot :: DoPa -> Pattern [getDoPa] :: Pattern -> DoPa -- | Bracket expression [...]. PAny :: DoPa -> PatternSet -> Pattern [getDoPa] :: Pattern -> DoPa [getPatternSet] :: Pattern -> PatternSet -- | Inverted bracket expression [^...]. PAnyNot :: DoPa -> PatternSet -> Pattern [getDoPa] :: Pattern -> DoPa [getPatternSet] :: Pattern -> PatternSet -- | Backslashed character c, may have special meaning. PEscape :: DoPa -> Char -> Pattern [getDoPa] :: Pattern -> DoPa [getPatternChar] :: Pattern -> Char -- | Single character, matches given character. PChar :: DoPa -> Char -> Pattern [getDoPa] :: Pattern -> DoPa [getPatternChar] :: Pattern -> Char -- | Tag for internal use, introduced by starTrans. PNonCapture :: Pattern -> Pattern -- | Tag for internal use, introduced by starTrans. PNonEmpty :: Pattern -> Pattern -- | Content of a bracket expression [...] organized into -- characters, POSIX character classes (e.g. [[:alnum:]]), -- collating elements (e.g. [.ch.], unused), and equivalence -- classes (e.g. [=a=], treated as characters). data PatternSet PatternSet :: Maybe (Set Char) -> Maybe (Set PatternSetCharacterClass) -> Maybe (Set PatternSetCollatingElement) -> Maybe (Set PatternSetEquivalenceClass) -> PatternSet -- | Content of [: :], e.g. "alnum" for -- [:alnum:]. newtype PatternSetCharacterClass PatternSetCharacterClass :: String -> PatternSetCharacterClass [unSCC] :: PatternSetCharacterClass -> String -- | Content of [. .], e.g. "ch" for [.ch.]. newtype PatternSetCollatingElement PatternSetCollatingElement :: String -> PatternSetCollatingElement [unSCE] :: PatternSetCollatingElement -> String -- | Content of [= =], e.g. "a" for [=a=]. newtype PatternSetEquivalenceClass PatternSetEquivalenceClass :: String -> PatternSetEquivalenceClass [unSEC] :: PatternSetEquivalenceClass -> String -- | GroupIndex is for indexing submatches from capturing -- parenthesized groups (PGroup or Group). type GroupIndex = Int -- | Used to track elements of the pattern that accept characters or are -- anchors. newtype DoPa DoPa :: Int -> DoPa [dopaIndex] :: DoPa -> Int -- | This returns the strictly ascending list of characters represented by -- [: :] POSIX character classes. Unrecognized class names -- return an empty string. decodeCharacterClass :: PatternSetCharacterClass -> String -- | decodePatternSet cannot handle collating element and treats -- equivalence classes as just their definition and nothing more. decodePatternSet :: PatternSet -> Set Char showPattern :: Pattern -> String -- | Do the transformation and simplification in a single traversal. This -- removes the PPlus, PQuest, and PBound values, -- changing to POr and PEmpty and PStar. For some -- PBound values it adds PNonEmpty and PNonCapture -- semantic marker. It also simplifies to flatten out nested POr -- and PConcat instances and eliminate some unneeded PEmpty -- values. starTrans :: Pattern -> Pattern starTrans' :: Pattern -> Pattern -- | Function to transform a pattern into an equivalent, but less redundant -- form. Nested POr and PConcat are flattened. -- PEmpty is propagated. simplify' :: Pattern -> Pattern -- | Apply a Pattern transformation function depth first. dfsPattern :: (Pattern -> Pattern) -> Pattern -> Pattern instance GHC.Classes.Ord Text.Regex.TDFA.Pattern.PatternSetCharacterClass instance GHC.Classes.Eq Text.Regex.TDFA.Pattern.PatternSetCharacterClass instance GHC.Classes.Ord Text.Regex.TDFA.Pattern.PatternSetCollatingElement instance GHC.Classes.Eq Text.Regex.TDFA.Pattern.PatternSetCollatingElement instance GHC.Classes.Ord Text.Regex.TDFA.Pattern.PatternSetEquivalenceClass instance GHC.Classes.Eq Text.Regex.TDFA.Pattern.PatternSetEquivalenceClass instance GHC.Classes.Eq Text.Regex.TDFA.Pattern.PatternSet instance GHC.Show.Show Text.Regex.TDFA.Pattern.Pattern instance GHC.Classes.Eq Text.Regex.TDFA.Pattern.Pattern instance GHC.Show.Show Text.Regex.TDFA.Pattern.PatternSet instance GHC.Show.Show Text.Regex.TDFA.Pattern.PatternSetEquivalenceClass instance GHC.Show.Show Text.Regex.TDFA.Pattern.PatternSetCollatingElement instance GHC.Show.Show Text.Regex.TDFA.Pattern.PatternSetCharacterClass -- | The CorePattern module deconstructs the Pattern tree created by -- ReadRegex.parseRegex and returns a simpler Q/P tree with annotations -- at each Q node. This will be converted by the TNFA module into a QNFA -- finite automata. -- -- Of particular note, this Pattern to Q/P conversion creates and assigns -- all the internal Tags that will be used during the matching process, -- and associates the captures groups with the tags that represent their -- starting and ending locations and with their immediate parent group. -- -- Each Maximize and Minimize tag is held as either a preTag or a postTag -- by one and only one location in the Q/P tree. The Orbit tags are each -- held by one and only one Star node. Tags that stop a Group are also -- held in perhaps numerous preReset lists. -- -- The additional nullQ::nullView field of Q records the potentially -- complex information about what tests and tags must be used if the -- pattern unQ::P matches 0 zero characters. There can be redundancy in -- nullView, which is eliminated by cleanNullView. -- -- Uses recursive do notation. -- -- 2009 XXX TODO: we can avoid needing tags in the part of the pattern -- after the last capturing group (when right-associative). This is -- flipped for left-associative where the front of the pattern before the -- first capturing group needs no tags. The edge of these regions is -- subtle: both case needs a Maximize tag. One ought to be able to check -- the Pattern: if the root is PConcat then a scan from the end (start) -- looking for the first with an embedded PGroup can be found and the -- PGroup free elements can be wrapped in some new PNOTAG semantic -- indicator. module Text.Regex.TDFA.CorePattern data Q Q :: NullView -> (Position, Maybe Position) -> [Tag] -> [Tag] -> Maybe Tag -> Bool -> Bool -> Wanted -> P -> Q [nullQ] :: Q -> NullView [takes] :: Q -> (Position, Maybe Position) [preReset] :: Q -> [Tag] [postSet] :: Q -> [Tag] [preTag, postTag] :: Q -> Maybe Tag [tagged] :: Q -> Bool [childGroups] :: Q -> Bool [wants] :: Q -> Wanted [unQ] :: Q -> P data P Empty :: P Or :: [Q] -> P Seq :: Q -> Q -> P Star :: Maybe Tag -> [Tag] -> Bool -> Q -> P [getOrbit] :: P -> Maybe Tag [resetOrbits] :: P -> [Tag] [firstNull] :: P -> Bool [unStar] :: P -> Q Test :: TestInfo -> P OneChar :: Pattern -> P NonEmpty :: Q -> P -- | Known predicates, just Beginning of Line (^) and End of Line ($). Also -- support for GNU extensions is being added: \` beginning of buffer, \' -- end of buffer, \< and \> for begin and end of words, \b and \B -- for word boundary and not word boundary. data WhichTest -- | ^ (affected by multiline option) Test_BOL :: WhichTest -- | $ (affected by multiline option) Test_EOL :: WhichTest -- | \` beginning of buffer Test_BOB :: WhichTest -- | \' end ofbuffer Test_EOB :: WhichTest -- | \< beginning of word Test_BOW :: WhichTest -- | \> end of word Test_EOW :: WhichTest -- | \b word boundary Test_EdgeWord :: WhichTest -- | \B not word boundary Test_NotEdgeWord :: WhichTest data Wanted WantsQNFA :: Wanted WantsQT :: Wanted WantsBoth :: Wanted WantsEither :: Wanted type TestInfo = (WhichTest, DoPa) -- | Internal use to indicate type of tag and preference for larger or -- smaller Positions. data OP Maximize :: OP Minimize :: OP Orbit :: OP Ignore :: OP newtype SetTestInfo SetTestInfo :: EnumMap WhichTest (EnumSet DoPa) -> SetTestInfo [getTests] :: SetTestInfo -> EnumMap WhichTest (EnumSet DoPa) type NullView = [(SetTestInfo, TagList)] patternToQ :: CompOption -> (Pattern, (GroupIndex, DoPa)) -> (Q, Array Tag OP, Array GroupIndex [GroupInfo]) cleanNullView :: NullView -> NullView cannotAccept :: Q -> Bool mustAccept :: Q -> Bool instance GHC.Classes.Eq Text.Regex.TDFA.CorePattern.SetTestInfo instance GHC.Show.Show Text.Regex.TDFA.CorePattern.HandleTag instance GHC.Show.Show Text.Regex.TDFA.CorePattern.Wanted instance GHC.Classes.Eq Text.Regex.TDFA.CorePattern.Wanted instance GHC.Classes.Eq Text.Regex.TDFA.CorePattern.P instance GHC.Show.Show Text.Regex.TDFA.CorePattern.P instance GHC.Classes.Eq Text.Regex.TDFA.CorePattern.Q instance GHC.Show.Show Text.Regex.TDFA.CorePattern.Q instance GHC.Base.Semigroup Text.Regex.TDFA.CorePattern.SetTestInfo instance GHC.Base.Monoid Text.Regex.TDFA.CorePattern.SetTestInfo instance GHC.Show.Show Text.Regex.TDFA.CorePattern.SetTestInfo -- | This is a POSIX version of parseRegex that allows NUL characters. -- Lazy/Possessive/Backrefs are not recognized. Anchors ^ and $ are -- recognized. -- -- A PGroup returned always has (Maybe GroupIndex) -- set to (Just _) and never to Nothing. module Text.Regex.TDFA.ReadRegex -- | Return either an error message or a tuple of the Pattern and the -- largest group index and the largest DoPa index (both have smallest -- index of 1). Since the regular expression is supplied as [Char] it -- automatically supports unicode and \NUL characters. parseRegex :: String -> Either ParseError (Pattern, (GroupIndex, DoPa)) -- | Text.Regex.TDFA.TNFA converts the CorePattern Q/P data (and its -- Pattern leafs) to a QNFA tagged non-deterministic finite automata. -- -- This holds every possible way to follow one state by another, while in -- the DFA these will be reduced by picking a single best transition for -- each (source,destination) pair. The transitions are heavily and often -- redundantly annotated with tasks to perform, and this redundancy is -- reduced when picking the best transition. So far, keeping all this -- information has helped fix bugs in both the design and implementation. -- -- The QNFA for a Pattern with a starTraned Q/P form with N one character -- accepting leaves has at most N+1 nodes. These nodes represent the -- future choices after accepting a leaf. The processing of Or nodes -- often reduces this number by sharing at the end of the different -- paths. Turning off capturing while compiling the pattern may (future -- extension) reduce this further for some patterns by processing Star -- with optimizations. This compact design also means that tags are -- assigned not just to be updated before taking a transition (PreUpdate) -- but also after the transition (PostUpdate). -- -- Uses recursive do notation. module Text.Regex.TDFA.TNFA patternToNFA :: CompOption -> (Pattern, (GroupIndex, DoPa)) -> ((Index, Array Index QNFA), Array Tag OP, Array GroupIndex [GroupInfo]) -- | Internal NFA node type. data QNFA QNFA :: Index -> QT -> QNFA [q_id] :: QNFA -> Index [q_qt] :: QNFA -> QT -- | Internal to QNFA type. data QT Simple :: WinTags -> CharMap QTrans -> QTrans -> QT -- | empty transitions to the virtual winning state [qt_win] :: QT -> WinTags -- | all ways to leave this QNFA to other or the same QNFA [qt_trans] :: QT -> CharMap QTrans -- | default ways to leave this QNFA to other or the same QNFA [qt_other] :: QT -> QTrans Testing :: WhichTest -> EnumSet DoPa -> QT -> QT -- | The test to perform [qt_test] :: QT -> WhichTest -- | location(s) of the anchor(s) in the original regexp [qt_dopas] :: QT -> EnumSet DoPa -- | use qt_a if test is True, else use qt_b -- | use qt_a if test is True, else use qt_b [qt_a, qt_b] :: QT -> QT -- | Internal type to represent the tagged transition from one QNFA to -- another (or itself). The key is the Index of the destination QNFA. type QTrans = IntMap [TagCommand] -- | When attached to a QTrans the TagTask can be done before or after -- accepting the character. data TagUpdate PreUpdate :: TagTask -> TagUpdate PostUpdate :: TagTask -> TagUpdate -- | Text.Regex.TDFA.TDFA converts the QNFA from TNFA into the DFA. -- A DFA state corresponds to a Set of QNFA states, represented as list -- of Index which are used to lookup the DFA state in a lazy Trie which -- holds all possible subsets of QNFA states. module Text.Regex.TDFA.TDFA patternToRegex :: (Pattern, (GroupIndex, DoPa)) -> CompOption -> ExecOption -> Regex -- | Internal DFA node, identified by the Set of indices of the QNFA nodes -- it represents. data DFA DFA :: SetIndex -> DT -> DFA [d_id] :: DFA -> SetIndex [d_dt] :: DFA -> DT -- | Internal to the DFA node data DT Simple' :: IntMap Instructions -> CharMap Transition -> Transition -> DT -- | Actions to perform to win [dt_win] :: DT -> IntMap Instructions -- | Transition to accept Char [dt_trans] :: DT -> CharMap Transition -- | default accepting transition [dt_other] :: DT -> Transition Testing' :: WhichTest -> EnumSet DoPa -> DT -> DT -- | The test to perform [dt_test] :: DT -> WhichTest -- | location(s) of the anchor(s) in the original regexp [dt_dopas] :: DT -> EnumSet DoPa -- | use dt_a if test is True else use dt_b -- | use dt_a if test is True else use dt_b [dt_a, dt_b] :: DT -> DT examineDFA :: Regex -> String nfaToDFA :: ((Index, Array Index QNFA), Array Tag OP, Array GroupIndex [GroupInfo]) -> CompOption -> ExecOption -> Regex dfaMap :: DFA -> Map SetIndex DFA instance GHC.Show.Show Text.Regex.TDFA.TDFA.AlterOrbit -- | This modules provides RegexMaker and RegexLike instances -- for using String with the TDFA backend. -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.String -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker. data Regex -- | 0 based index from start of source, or (-1) for unused type MatchOffset = Int -- | non-negative length of a match type MatchLength = Int -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (\1, \2, etc). -- Controls enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> String -> Either String Regex execute :: Regex -> String -> Either String (Maybe MatchArray) regexec :: Regex -> String -> Either String (Maybe (String, String, String, [String])) instance Text.Regex.Base.RegexLike.RegexMaker Text.Regex.TDFA.Common.Regex Text.Regex.TDFA.Common.CompOption Text.Regex.TDFA.Common.ExecOption GHC.Base.String instance Text.Regex.Base.RegexLike.RegexLike Text.Regex.TDFA.Common.Regex GHC.Base.String instance Text.Regex.Base.RegexLike.RegexContext Text.Regex.TDFA.Common.Regex GHC.Base.String GHC.Base.String -- | This modules provides RegexMaker and RegexLike instances -- for using ByteString with the DFA backend -- (Text.Regex.Lib.WrapDFAEngine and -- Text.Regex.Lazy.DFAEngineFPS). This module is usually used via -- import Text.Regex.TDFA. -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.Sequence -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker. data Regex -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (\1, \2, etc). -- Controls enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> Seq Char -> Either String Regex execute :: Regex -> Seq Char -> Either String (Maybe MatchArray) regexec :: Regex -> Seq Char -> Either String (Maybe (Seq Char, Seq Char, Seq Char, [Seq Char])) instance Text.Regex.Base.RegexLike.RegexContext Text.Regex.TDFA.Common.Regex (Data.Sequence.Internal.Seq GHC.Types.Char) (Data.Sequence.Internal.Seq GHC.Types.Char) instance Text.Regex.Base.RegexLike.RegexMaker Text.Regex.TDFA.Common.Regex Text.Regex.TDFA.Common.CompOption Text.Regex.TDFA.Common.ExecOption (Data.Sequence.Internal.Seq GHC.Types.Char) instance Text.Regex.Base.RegexLike.RegexLike Text.Regex.TDFA.Common.Regex (Data.Sequence.Internal.Seq GHC.Types.Char) -- | This modules provides RegexMaker and RegexLike instances -- for using ByteString with the DFA backend -- (Text.Regex.Lib.WrapDFAEngine and -- Text.Regex.Lazy.DFAEngineFPS). This module is usually used via -- import Text.Regex.TDFA. -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.ByteString.Lazy -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker. data Regex -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (\1, \2, etc). -- Controls enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> ByteString -> Either String Regex execute :: Regex -> ByteString -> Either String (Maybe MatchArray) regexec :: Regex -> ByteString -> Either String (Maybe (ByteString, ByteString, ByteString, [ByteString])) instance Text.Regex.Base.RegexLike.RegexContext Text.Regex.TDFA.Common.Regex Data.ByteString.Lazy.Internal.ByteString Data.ByteString.Lazy.Internal.ByteString instance Text.Regex.Base.RegexLike.RegexMaker Text.Regex.TDFA.Common.Regex Text.Regex.TDFA.Common.CompOption Text.Regex.TDFA.Common.ExecOption Data.ByteString.Lazy.Internal.ByteString instance Text.Regex.Base.RegexLike.RegexLike Text.Regex.TDFA.Common.Regex Data.ByteString.Lazy.Internal.ByteString -- | This modules provides RegexMaker and RegexLike instances -- for using ByteString with the DFA backend -- (Text.Regex.Lib.WrapDFAEngine and -- Text.Regex.Lazy.DFAEngineFPS). This module is usually used via -- import Text.Regex.TDFA. -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.ByteString -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker. data Regex -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (\1, \2, etc). -- Controls enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> ByteString -> Either String Regex execute :: Regex -> ByteString -> Either String (Maybe MatchArray) regexec :: Regex -> ByteString -> Either String (Maybe (ByteString, ByteString, ByteString, [ByteString])) instance Text.Regex.Base.RegexLike.RegexContext Text.Regex.TDFA.Common.Regex Data.ByteString.Internal.ByteString Data.ByteString.Internal.ByteString instance Text.Regex.Base.RegexLike.RegexMaker Text.Regex.TDFA.Common.Regex Text.Regex.TDFA.Common.CompOption Text.Regex.TDFA.Common.ExecOption Data.ByteString.Internal.ByteString instance Text.Regex.Base.RegexLike.RegexLike Text.Regex.TDFA.Common.Regex Data.ByteString.Internal.ByteString -- | This modules provides RegexMaker and RegexLike instances -- for using Text with the TDFA backend -- (Text.Regex.TDFA.NewDFA.Engine and -- Text.Regex.TDFA.NewDFA.Tester). -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.Text -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker. data Regex -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (\1, \2, etc). -- Controls enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> Text -> Either String Regex execute :: Regex -> Text -> Either String (Maybe MatchArray) regexec :: Regex -> Text -> Either String (Maybe (Text, Text, Text, [Text])) instance Text.Regex.Base.RegexLike.RegexContext Text.Regex.TDFA.Common.Regex Data.Text.Internal.Text Data.Text.Internal.Text instance Text.Regex.Base.RegexLike.RegexMaker Text.Regex.TDFA.Common.Regex Text.Regex.TDFA.Common.CompOption Text.Regex.TDFA.Common.ExecOption Data.Text.Internal.Text instance Text.Regex.Base.RegexLike.RegexLike Text.Regex.TDFA.Common.Regex Data.Text.Internal.Text -- | This modules provides RegexMaker and RegexLike instances -- for using Text with the TDFA backend -- (Text.Regex.TDFA.NewDFA.Engine and -- Text.Regex.TDFA.NewDFA.Tester). -- -- This exports instances of the high level API and the medium level API -- of compile,execute, and regexec. module Text.Regex.TDFA.Text.Lazy -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker. data Regex -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (\1, \2, etc). -- Controls enabling extra anchor syntax. data CompOption data ExecOption compile :: CompOption -> ExecOption -> Text -> Either String Regex execute :: Regex -> Text -> Either String (Maybe MatchArray) regexec :: Regex -> Text -> Either String (Maybe (Text, Text, Text, [Text])) instance Text.Regex.Base.RegexLike.RegexContext Text.Regex.TDFA.Common.Regex Data.Text.Internal.Lazy.Text Data.Text.Internal.Lazy.Text instance Text.Regex.Base.RegexLike.RegexMaker Text.Regex.TDFA.Common.Regex Text.Regex.TDFA.Common.CompOption Text.Regex.TDFA.Common.ExecOption Data.Text.Internal.Lazy.Text instance Text.Regex.Base.RegexLike.RegexLike Text.Regex.TDFA.Common.Regex Data.Text.Internal.Lazy.Text -- | The Text.Regex.TDFA module provides a backend for regular -- expressions. It provides instances for the classes defined and -- documented in Text.Regex.Base and re-exported by this module. -- If you import this along with other backends then you should do so -- with qualified imports (with renaming for convenience). -- -- This regex-tdfa package implements, correctly, POSIX extended regular -- expressions. It is highly unlikely that the regex-posix -- package on your operating system is correct, see -- http://www.haskell.org/haskellwiki/Regex_Posix for examples of -- your OS's bugs. -- --

Importing and using

-- -- Declare a dependency on the regex-tdfa library in your -- .cabal file: -- --
--   build-depends: regex-tdfa ^>= 1.3.2
--   
-- -- In Haskell modules where you want to use regexes simply -- import this module: -- --
--   import Text.Regex.TDFA
--   
-- --

Basics

-- --
--   >>> let emailRegex = "[a-zA-Z0-9+._-]+\[-a-zA-Z]+\.[a-z]+"
--   >>> "my email is first-name.lastname_1974e-mail.com" =~ emailRegex :: Bool
--   True
--   
--   >>> "invalidmailcom" =~ emailRegex :: Bool
--   False
--   
--   >>> "invalidmail.COM" =~ emailRegex :: Bool
--   False
--   
--   >>> "#invalid.com" =~ emailRegex :: Bool
--   False
--   
--   -- non-monadic
--   λ> <to-match-against> =~ <regex>
--   
--   -- monadic, uses fail on lack of match
--   λ> <to-match-against> =~~ <regex>
--   
-- -- (=~) and (=~~) are polymorphic in their return type. -- This is so that regex-tdfa can pick the most efficient way to give you -- your result based on what you need. For instance, if all you want is -- to check whether the regex matched or not, there's no need to allocate -- a result string. If you only want the first match, rather than all the -- matches, then the matching engine can stop after finding a single hit. -- -- This does mean, though, that you may sometimes have to explicitly -- specify the type you want, especially if you're trying things out at -- the REPL. -- --

Common use cases

-- --

Get the first match

-- --
--   -- returns empty string if no match
--   a =~ b :: String  -- or ByteString, or Text...
--   
--   >>> "alexis-de-tocqueville" =~ "[a-z]+" :: String
--   "alexis"
--   
--   >>> "alexis-de-tocqueville" =~ "[0-9]+" :: String
--   ""
--   
-- --

Check if it matched at all

-- --
--   a =~ b :: Bool
--   
--   >>> "alexis-de-tocqueville" =~ "[a-z]+" :: Bool
--   True
--   
-- --

Get first match + text before/after

-- --
--   -- if no match, will just return whole
--   -- string in the first element of the tuple
--   a =~ b :: (String, String, String)
--   
--   >>> "alexis-de-tocqueville" =~ "de" :: (String, String, String)
--   ("alexis-","de","-tocqueville")
--   
--   >>> "alexis-de-tocqueville" =~ "kant" :: (String, String, String)
--   ("alexis-de-tocqueville","","")
--   
-- --

Get first match + submatches

-- --
--   -- same as above, but also returns a list of just submatches.
--   -- submatch list is empty if regex doesn't match at all
--   a =~ b :: (String, String, String, [String])
--   
--   >>> "div[attr=1234]" =~ "div\[([a-z]+)=([^]]+)\]" :: (String, String, String, [String])
--   ("","div[attr=1234]","",["attr","1234"])
--   
-- --

Get all matches

-- --
--   -- can also return Data.Array instead of List
--   getAllTextMatches (a =~ b) :: [String]
--   
--   >>> getAllTextMatches ("john anne yifan" =~ "[a-z]+") :: [String]
--   ["john","anne","yifan"]
--   
--   >>> getAllTextMatches ("* - . a + z" =~ "[--z]+") :: [String]
--   ["-",".","a","z"]
--   
-- --

Feature support

-- -- This package does provide captured parenthesized subexpressions. -- -- Depending on the text being searched this package supports Unicode. -- The [Char], Text, Text.Lazy, and (Seq -- Char) text types support Unicode. The ByteString and -- ByteString.Lazy text types only support ASCII. -- -- As of version 1.1.1 the following GNU extensions are recognized, all -- anchors: -- -- -- -- The above are controlled by the newSyntax Bool in -- CompOption. -- -- Where the "word" boundaries means between characters that are and are -- not in the [:word:] character class which contains [a-zA-Z0-9_]. Note -- that \< and \b may match before the entire text and \> and \b -- may match at the end of the entire text. -- -- There is no locale support, so collating elements like [.ch.] are -- simply ignored and equivalence classes like [=a=] are converted to -- just [a]. The character classes like [:alnum:] are supported over -- ASCII only, valid classes are alnum, digit, punct, alpha, graph, -- space, blank, lower, upper, cntrl, print, xdigit, word. -- --
--   >>> getAllTextMatches ("john anne yifan" =~ "[[:lower:]]+") :: [String]
--   ["john","anne","yifan"]
--   
-- -- This package does not provide "basic" regular expressions. This -- package does not provide back references inside regular expressions. -- -- The package does not provide Perl style regular expressions. Please -- look at the regex-pcre and pcre-light packages instead. -- -- This package does not provide find-and-replace. -- --

Avoiding backslashes

-- -- If you find yourself writing a lot of regexes, take a look at -- raw-strings-qq. It'll let you write regexes without needing to -- escape all your backslashes. -- --
--   {-# LANGUAGE QuasiQuotes #-}
--   
--   import Text.RawString.QQ
--   import Text.Regex.TDFA
--   
--   λ> "2 * (3 + 1) / 4" =~ [r|\([^)]+\)|] :: String
--   "(3 + 1)"
--   
module Text.Regex.TDFA getVersion_Text_Regex_TDFA :: Version -- | This is the pure functional matching operator. If the target cannot be -- produced then some empty result will be returned. If there is an error -- in processing, then error will be called. (=~) :: (RegexMaker Regex CompOption ExecOption source, RegexContext Regex source1 target) => source1 -> source -> target -- | This is the monadic matching operator. If a single match fails, then -- fail will be called. (=~~) :: (RegexMaker Regex CompOption ExecOption source, RegexContext Regex source1 target, MonadFail m) => source1 -> source -> m target -- | The TDFA backend specific Regex type, used by this module's -- RegexOptions and RegexMaker. data Regex data ExecOption ExecOption :: Bool -> ExecOption -- | True by default. Set to False to improve speed (and space). [captureGroups] :: ExecOption -> Bool -- | Control whether the pattern is multiline or case-sensitive like -- Text.Regex and whether to capture the subgroups (\1, \2, etc). -- Controls enabling extra anchor syntax. data CompOption CompOption :: Bool -> Bool -> Bool -> Bool -> Bool -> CompOption -- | True in blankCompOpt and defaultCompOpt. [caseSensitive] :: CompOption -> Bool -- | False in blankCompOpt, True in defaultCompOpt. Compile -- for newline-sensitive matching. -- -- From regexp man page: "By default, newline is a completely -- ordinary character with no special meaning in either REs or strings. -- With this flag, inverted bracket expressions [^ and -- . never match newline, a ^ anchor matches the null -- string after any newline in the string in addition to its normal -- function, and the $ anchor matches the null string before any -- newline in the string in addition to its normal function." [multiline] :: CompOption -> Bool -- | True (and therefore right associative) in blankCompOpt and -- defaultCompOpt. [rightAssoc] :: CompOption -> Bool -- | False in blankCompOpt, True in defaultCompOpt. Enables -- the extended non-POSIX syntax described in Text.Regex.TDFA -- haddock documentation. [newSyntax] :: CompOption -> Bool -- | False by default. This is POSIX correct but it takes space and is -- slower. Setting this to True will improve performance, and should be -- done if you plan to set the captureGroups ExecOption to -- False. [lastStarGreedy] :: CompOption -> Bool