Copyright | (C) 2016-17 Chris Dornan |
---|---|
License | BSD3 (see the LICENSE file) |
Maintainer | Chris Dornan <chris.dornan@irisconnect.com> |
Stability | RFC |
Portability | portable |
Safe Haskell | None |
Language | Haskell2010 |
- data Matches a = Matches {
- matchesSource :: !a
- allMatches :: ![Match a]
- data Match a = Match {
- matchSource :: !a
- captureNames :: !CaptureNames
- matchArray :: !(Array CaptureOrdinal (Capture a))
- data Capture a = Capture {
- captureSource :: !a
- capturedText :: !a
- captureOffset :: !Int
- captureLength :: !Int
- noMatch :: a -> Match a
- anyMatches :: Matches a -> Bool
- countMatches :: Matches a -> Int
- matches :: Matches a -> [a]
- mainCaptures :: Matches a -> [Capture a]
- matched :: Match a -> Bool
- matchedText :: Match a -> Maybe a
- matchCapture :: Match a -> Maybe (Capture a)
- matchCaptures :: Match a -> Maybe (Capture a, [Capture a])
- (!$$) :: Match a -> CaptureID -> a
- captureText :: CaptureID -> Match a -> a
- (!$$?) :: Match a -> CaptureID -> Maybe a
- captureTextMaybe :: CaptureID -> Match a -> Maybe a
- (!$) :: Match a -> CaptureID -> Capture a
- capture :: CaptureID -> Match a -> Capture a
- (!$?) :: Match a -> CaptureID -> Maybe (Capture a)
- captureMaybe :: CaptureID -> Match a -> Maybe (Capture a)
- hasCaptured :: Capture a -> Bool
- capturePrefix :: Extract a => Capture a -> a
- captureSuffix :: Extract a => Capture a -> a
- class Replace s => IsRegex re s where
- data Options_ r c e = Options {
- optionsMacs :: !(Macros r)
- optionsComp :: !c
- optionsExec :: !e
- class IsOption o r c e | e -> r, c -> e, e -> c, r -> c, c -> r, r -> e where
- makeOptions :: o -> Options_ r c e
- newtype MacroID = MacroID {
- getMacroID :: String
- type Macros r = HashMap MacroID r
- emptyMacros :: Macros r
- data SimpleRegexOptions
- data CaptureID
- type CaptureNames = HashMap CaptureName CaptureOrdinal
- noCaptureNames :: CaptureNames
- newtype CaptureName = CaptureName {
- getCaptureName :: Text
- newtype CaptureOrdinal = CaptureOrdinal {}
- findCaptureID :: CaptureID -> CaptureNames -> Int
- data Edits m re s
- data Edit m s
- data LineEdit s
- = NoEdit
- | ReplaceWith s
- | Delete
- applyEdits :: (IsRegex re s, Monad m, Functor m) => LineNo -> Edits m re s -> s -> m s
- applyEdit :: (IsRegex re s, Monad m, Functor m) => (s -> s) -> LineNo -> re -> Edit m s -> s -> m (Maybe s)
- applyLineEdit :: Monoid s => (s -> s) -> LineEdit s -> Maybe s
- newtype LineNo = ZeroBasedLineNo {}
- firstLine :: LineNo
- getLineNo :: LineNo -> Int
- lineNo :: Int -> LineNo
- parseInteger :: Replace a => a -> Maybe Int
- parseHex :: Replace a => a -> Maybe Int
- parseDouble :: Replace a => a -> Maybe Double
- parseString :: Replace a => a -> Maybe Text
- parseSimpleString :: Replace a => a -> Maybe Text
- parseDate :: Replace a => a -> Maybe Day
- parseSlashesDate :: Replace a => a -> Maybe Day
- parseTimeOfDay :: Replace a => a -> Maybe TimeOfDay
- parseTimeZone :: Replace a => a -> Maybe TimeZone
- parseDateTime :: Replace a => a -> Maybe UTCTime
- parseDateTime8601 :: Replace a => a -> Maybe UTCTime
- parseDateTimeCLF :: Replace a => a -> Maybe UTCTime
- parseShortMonth :: Replace a => a -> Maybe Int
- shortMonthArray :: Array Int Text
- type IPV4Address = (Word8, Word8, Word8, Word8)
- parseIPv4Address :: Replace a => a -> Maybe IPV4Address
- data Severity
- parseSeverity :: Replace a => a -> Maybe Severity
- severityKeywords :: Severity -> (Text, [Text])
- class (Extract a, Monoid a) => Replace a where
- data ReplaceMethods a = ReplaceMethods {
- methodLength :: a -> Int
- methodSubst :: (a -> a) -> Capture a -> a
- replaceMethods :: Replace a => ReplaceMethods a
- data Context
- data Location = Location {}
- isTopLocation :: Location -> Bool
- replace :: Replace a => Match a -> a -> a
- replaceAll :: Replace a => a -> Matches a -> a
- replaceAllCaptures :: Replace a => Context -> (Match a -> Location -> Capture a -> Maybe a) -> Matches a -> a
- replaceAllCaptures_ :: Extract a => ReplaceMethods a -> Context -> (Match a -> Location -> Capture a -> Maybe a) -> Matches a -> a
- replaceAllCapturesM :: (Extract a, Monad m) => ReplaceMethods a -> Context -> (Match a -> Location -> Capture a -> m (Maybe a)) -> Matches a -> m a
- replaceCaptures :: Replace a => Context -> (Match a -> Location -> Capture a -> Maybe a) -> Match a -> a
- replaceCaptures_ :: Extract a => ReplaceMethods a -> Context -> (Match a -> Location -> Capture a -> Maybe a) -> Match a -> a
- replaceCapturesM :: (Monad m, Extract a) => ReplaceMethods a -> Context -> (Match a -> Location -> Capture a -> m (Maybe a)) -> Match a -> m a
- expandMacros :: (r -> String) -> Macros r -> String -> String
- expandMacros' :: (MacroID -> Maybe String) -> String -> String
- data Line = Line {
- getLineNumber :: LineNo
- getLineMatches :: Matches ByteString
- grep :: IsRegex re ByteString => re -> FilePath -> IO ()
- grepLines :: IsRegex re ByteString => re -> FilePath -> IO [Line]
- type GrepScript re s t = [(re, LineNo -> Matches s -> Maybe t)]
- grepScript :: IsRegex re s => GrepScript re s t -> [s] -> [t]
- linesMatched :: [Line] -> [Line]
- alex :: IsRegex re s => [(re, Match s -> Maybe t)] -> t -> s -> [t]
- alex' :: Replace s => (re -> s -> Match s) -> [(re, Match s -> Maybe t)] -> t -> s -> [t]
- type SedScript re = Edits IO re ByteString
- sed :: IsRegex re ByteString => SedScript re -> FilePath -> FilePath -> IO ()
- sed' :: (IsRegex re ByteString, Monad m, Functor m) => Edits m re ByteString -> ByteString -> m ByteString
Tutorial
We have a regex tutorial at http://tutorial.regex.uk. These API docs are mainly for reference.
How to use this library
This module won't provide any operators to match a regular expression against text as it merely provides the toolkit for working with the output of the match operators. You probably won't import it directly but import one of the modules that provides the match operators, which will in tuen re-export this module.
The module that you choose to import will depend upon two factors:
- Which flavour of regular expression do you want to use? If you want Posix flavour REs then you want the TDFA modules, otherwise its PCRE for Perl-style REs.
- What type of text do you want to match: (slow)
String
s,ByteString
,ByteString.Lazy
,Text
,Text.Lazy
or the anachronisticSeq Char
or indeed a good old-fashioned polymorphic operators?
While we aim to provide all combinations of these choices, some of them are currently not available. We have:
The Match Operators
The traditional =~
and =~~
operators are exported by the regex
,
but we recommend that you use the two new operators, especially if
you are not familiar with the old operators. We have:
txt ?=~ re
searches for a single match yielding a value of typeMatch
a
wherea
is the type of the text you are searching.txt *=~ re
searches for all non-overlapping matches intxt
, returning a value of typeMatches
a
.
See the sections below for more information on these Matches
and
Match
result types.
Matches, Match & Capture
the result type to use when every match is needed, not just the first match of the RE against the source
Matches | |
|
the result of matching a RE to a text once, listing the text that was matched and the named captures in the RE and all of the substrings matched, with the text captured by the whole RE; a complete failure to match will be represented with an empty array (with bounds (0,-1))
Match | |
|
the matching of a single sub-expression against part of the source text
Capture | |
|
Matches functions
anyMatches :: Matches a -> Bool Source
tests whether the RE matched the source text at all
countMatches :: Matches a -> Int Source
count the matches
mainCaptures :: Matches a -> [Capture a] Source
extract the main capture from each match
Match functions
matchedText :: Match a -> Maybe a Source
tests whether the RE matched the source text at all
matchCapture :: Match a -> Maybe (Capture a) Source
the top-level capture if the source text matched the RE, Nothing otherwise
matchCaptures :: Match a -> Maybe (Capture a, [Capture a]) Source
the top-level capture and the sub captures if the text matched the RE, Nothing otherwise
captureText :: CaptureID -> Match a -> a Source
look up the text of the nth capture, 0 being the match of the whole RE against the source text, 1, the first bracketed sub-expression to be matched and so on
captureTextMaybe :: CaptureID -> Match a -> Maybe a Source
look up the text of the nth capture (0 being the match of the whole), returning Nothing if the Match doesn't contain the capture
capture :: CaptureID -> Match a -> Capture a Source
look up the nth capture, 0 being the match of the whole RE against the source text, 1, the first bracketed sub-expression to be matched and so on
captureMaybe :: CaptureID -> Match a -> Maybe (Capture a) Source
look up the nth capture, 0 being the match of the whole RE against the source text, 1, the first bracketed sub-expression to be matched and so on, returning Nothing if there is no such capture, or if the capture failed to capture anything (being in a failed alternate)
Capture functions
hasCaptured :: Capture a -> Bool Source
test if the capture has matched any text
capturePrefix :: Extract a => Capture a -> a Source
returns the text preceding the match
captureSuffix :: Extract a => Capture a -> a Source
returns the text after the match
IsRegex
Options
Options | |
|
class IsOption o r c e | e -> r, c -> e, e -> c, r -> c, c -> r, r -> e where Source
makeOptions :: o -> Options_ r c e Source
emptyMacros :: Macros r Source
data SimpleRegexOptions Source
CaptureID
CaptureID identifies captures, either by number (e.g., [cp|1|]) or name (e.g., [cp|foo|]).
type CaptureNames = HashMap CaptureName CaptureOrdinal Source
the dictionary for named captures stored in compiled regular expressions associates
noCaptureNames :: CaptureNames Source
an empty CaptureNames
dictionary
newtype CaptureName Source
a CaptureName
is just the text of the name
CaptureName | |
|
Eq CaptureName | |
Ord CaptureName | |
Show CaptureName | |
Hashable CaptureName |
newtype CaptureOrdinal Source
a CaptureOrdinal
is just the number of the capture, starting
with 0 for the whole of the text matched, then in leftmost,
outermost
findCaptureID :: CaptureID -> CaptureNames -> Int Source
look up a CaptureID
in the CaptureNames
dictionary
Edit
applyEdit :: (IsRegex re s, Monad m, Functor m) => (s -> s) -> LineNo -> re -> Edit m s -> s -> m (Maybe s) Source
applyLineEdit :: Monoid s => (s -> s) -> LineEdit s -> Maybe s Source
LineNo
Parsers
parseInteger :: Replace a => a -> Maybe Int Source
parseDouble :: Replace a => a -> Maybe Double Source
parseString :: Replace a => a -> Maybe Text Source
parseSimpleString :: Replace a => a -> Maybe Text Source
parseSlashesDate :: Replace a => a -> Maybe Day Source
parseTimeOfDay :: Replace a => a -> Maybe TimeOfDay Source
parseTimeZone :: Replace a => a -> Maybe TimeZone Source
parseDateTime :: Replace a => a -> Maybe UTCTime Source
parseDateTime8601 :: Replace a => a -> Maybe UTCTime Source
parseDateTimeCLF :: Replace a => a -> Maybe UTCTime Source
parseShortMonth :: Replace a => a -> Maybe Int Source
shortMonthArray :: Array Int Text Source
parseIPv4Address :: Replace a => a -> Maybe IPV4Address Source
parseSeverity :: Replace a => a -> Maybe Severity Source
severityKeywords :: Severity -> (Text, [Text]) Source
Replace
class (Extract a, Monoid a) => Replace a where Source
Replace provides the missing methods needed to replace the matched text; lengthE is the minimum implementation
length function for a
inject String into a
project a onto a String
inject into Text
detextifyE :: Text -> a Source
project Text onto a
appendNewlineE :: a -> a Source
append a newline
substE :: (a -> a) -> Capture a -> a Source
apply a substitution function to a Capture
parseTemplateE :: a -> Match a -> Location -> Capture a -> Maybe a Source
convert a template containing $0, $1, etc., in the first
argument, into a phi
replacement function for use with
replaceAllCaptures and replaceCaptures
data ReplaceMethods a Source
a selction of the Replace methods can be encapsulated with ReplaceMethods for the higher-order replacement functions
ReplaceMethods | |
|
replaceMethods :: Replace a => ReplaceMethods a Source
replaceMethods encapsulates ReplaceMethods a from a Replace a context
Context
specifies which contexts the substitutions should be applied
the Location
information passed into the substitution function
specifies which sub-expression is being substituted
Location | |
|
isTopLocation :: Location -> Bool Source
True iff the location references a complete match (i.e., not a bracketed capture)
replace :: Replace a => Match a -> a -> a Source
replace with a template containing $0 for whole text, $1 for first capture, etc.
replaceAll :: Replace a => a -> Matches a -> a Source
replace all with a template, $0 for whole text, $1 for first capture, etc.
replaceAllCaptures :: Replace a => Context -> (Match a -> Location -> Capture a -> Maybe a) -> Matches a -> a Source
substitutes using a function that takes the full Match context and returns the same replacement text as the _phi_phi context.
replaceAllCaptures_ :: Extract a => ReplaceMethods a -> Context -> (Match a -> Location -> Capture a -> Maybe a) -> Matches a -> a Source
replaceAllCaptures_ is like like replaceAllCaptures but takes the Replace methods through the ReplaceMethods argument
replaceAllCapturesM :: (Extract a, Monad m) => ReplaceMethods a -> Context -> (Match a -> Location -> Capture a -> m (Maybe a)) -> Matches a -> m a Source
replaceAllCapturesM is just a monadically generalised version of replaceAllCaptures_
replaceCaptures :: Replace a => Context -> (Match a -> Location -> Capture a -> Maybe a) -> Match a -> a Source
substitutes using a function that takes the full Match context and returns the same replacement text as the _phi_phi context.
replaceCaptures_ :: Extract a => ReplaceMethods a -> Context -> (Match a -> Location -> Capture a -> Maybe a) -> Match a -> a Source
replaceCaptures_ is like replaceCaptures but takes the Replace methods through the ReplaceMethods argument
replaceCapturesM :: (Monad m, Extract a) => ReplaceMethods a -> Context -> (Match a -> Location -> Capture a -> m (Maybe a)) -> Match a -> m a Source
replaceCapturesM is just a monadically generalised version of replaceCaptures_
expandMacros :: (r -> String) -> Macros r -> String -> String Source
expand all of the @{..} macros in the RE in the argument String according to the Macros argument, preprocessing the RE String according to the Mode argument (used internally)
expandMacros' :: (MacroID -> Maybe String) -> String -> String Source
expand the @{..} macos in the argument string using the given function
Tools
Grep
type GrepScript re s t = [(re, LineNo -> Matches s -> Maybe t)] Source
grepScript :: IsRegex re s => GrepScript re s t -> [s] -> [t] Source
linesMatched :: [Line] -> [Line] Source