module Text.Regex.PCRE.Wrap(
Regex,
CompOption(CompOption),
ExecOption(ExecOption),
(=~),
(=~~),
StartOffset,
EndOffset,
ReturnCode(ReturnCode),
WrapError,
wrapCompile,
wrapTest,
wrapMatch,
wrapMatchAll,
wrapCount,
getVersion,
configUTF8,
getNumSubs,
unusedOffset,
compBlank,
compAnchored,
compAutoCallout,
compCaseless,
compDollarEndOnly,
compDotAll,
compExtended,
compExtra,
compFirstLine,
compMultiline,
compNoAutoCapture,
compUngreedy,
compUTF8,
compNoUTF8Check,
execBlank,
execAnchored,
execNotBOL,
execNotEOL,
execNotEmpty,
execNoUTF8Check,
execPartial,
retOk,
retNoMatch,
retNull,
retBadOption,
retBadMagic,
retUnknownNode,
retNoMemory,
retNoSubstring
) where
import Control.Monad(when)
import Data.Array(Array,accumArray)
import Data.Bits(Bits((.|.)))
import System.IO.Unsafe(unsafePerformIO)
import Foreign(Ptr,ForeignPtr,FinalizerPtr
,alloca,allocaBytes,nullPtr
,peek,peekElemOff
,newForeignPtr,withForeignPtr)
import Foreign.C(CInt(..),CChar)
import Foreign.C.String(CString,CStringLen,peekCString)
import Text.Regex.Base.RegexLike(RegexOptions(..),RegexMaker(..),RegexContext(..),MatchArray,MatchOffset)
getVersion :: Maybe String
type PCRE = ()
type StartOffset = MatchOffset
type EndOffset = MatchOffset
type WrapError = (ReturnCode,String)
newtype CompOption = CompOption CInt deriving (Eq,Show,Num,Bits)
newtype ExecOption = ExecOption CInt deriving (Eq,Show,Num,Bits)
newtype ReturnCode = ReturnCode CInt deriving (Eq,Show)
data Regex = Regex (ForeignPtr PCRE) CompOption ExecOption
compBlank :: CompOption
execBlank :: ExecOption
unusedOffset :: MatchOffset
retOk :: ReturnCode
wrapCompile :: CompOption
-> ExecOption
-> CString
-> IO (Either (MatchOffset,String) Regex)
wrapTest :: StartOffset
-> Regex
-> CStringLen
-> IO (Either WrapError Bool)
wrapMatch :: StartOffset
-> Regex
-> CStringLen
-> IO (Either WrapError (Maybe [(StartOffset,EndOffset)]))
wrapMatchAll :: Regex -> CStringLen -> IO (Either WrapError [ MatchArray ])
wrapCount :: Regex -> CStringLen -> IO (Either WrapError Int)
getNumSubs :: Regex -> Int
configUTF8 :: Bool
(=~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target)
=> source1 -> source -> target
(=~~) :: (RegexMaker Regex CompOption ExecOption source,RegexContext Regex source1 target,Monad m)
=> source1 -> source -> m target
instance RegexOptions Regex CompOption ExecOption where
blankCompOpt = compBlank
blankExecOpt = execBlank
defaultCompOpt = compMultiline
defaultExecOpt = execBlank
setExecOpts e' (Regex r c _) = Regex r c e'
getExecOpts (Regex _ _ e) = e
(=~) x r = let q :: Regex
q = makeRegex r
in match q x
(=~~) x r = do (q :: Regex) <- makeRegexM r
matchM q x
type PCRE_Extra = ()
fi :: (Integral i,Num n ) => i -> n
fi x = fromIntegral x
compBlank = CompOption 0
execBlank = ExecOption 0
unusedOffset = (1)
retOk = ReturnCode 0
retNeededMoreSpace :: ReturnCode
retNeededMoreSpace = ReturnCode 0
newtype InfoWhat = InfoWhat CInt deriving (Eq,Show)
newtype ConfigWhat = ConfigWhat CInt deriving (Eq,Show)
nullTest' :: Ptr a -> String -> IO (Either (MatchOffset,String) b) -> IO (Either (MatchOffset,String) b)
nullTest' ptr msg io = do
if nullPtr == ptr
then return (Left (0,"Ptr parameter was nullPtr in Text.Regex.PCRE.Wrap."++msg))
else io
nullTest :: Ptr a -> String -> IO (Either WrapError b) -> IO (Either WrapError b)
nullTest ptr msg io = do
if nullPtr == ptr
then return (Left (retOk,"Ptr parameter was nullPtr in Text.Regex.PCRE.Wrap."++msg))
else io
wrapRC :: ReturnCode -> IO (Either WrapError b)
wrapRC r = return (Left (r,"Error in Text.Regex.PCRE.Wrap: "++show r))
wrapCompile flags e pattern = do
nullTest' pattern "wrapCompile pattern" $ do
alloca $ \errOffset -> alloca $ \errPtr -> do
nullTest' errPtr "wrapCompile errPtr" $ do
pcre_ptr <- c_pcre_compile pattern flags errPtr errOffset nullPtr
if pcre_ptr == nullPtr
then do
offset <- peek errOffset
string <- peekCString =<< peek errPtr
return (Left (fi offset,string))
else do regex <- newForeignPtr c_ptr_free pcre_ptr
return . Right $ Regex regex flags e
getNumSubs (Regex pcre_fptr _ _) = fi . unsafePerformIO $ withForeignPtr pcre_fptr getNumSubs'
getNumSubs' :: Ptr PCRE -> IO CInt
getNumSubs' pcre_ptr =
alloca $ \st -> do
when (st == nullPtr) (fail "Text.Regex.PCRE.Wrap.getNumSubs' could not allocate a CInt!!!")
c_pcre_fullinfo pcre_ptr nullPtr pcreInfoCapturecount st
peek st
wrapTest startOffset (Regex pcre_fptr _ flags) (cstr,len) = do
nullTest cstr "wrapTest cstr" $ do
withForeignPtr pcre_fptr $ \pcre_ptr -> do
r@(ReturnCode r') <- c_pcre_exec pcre_ptr nullPtr cstr (fi len) (fi startOffset) flags nullPtr 0
if r == retNoMatch
then return (Right False)
else if r' < 0
then wrapRC r
else return (Right True)
wrapMatch startOffset (Regex pcre_fptr _ flags) (cstr,len) = do
nullTest cstr "wrapMatch cstr" $ do
withForeignPtr pcre_fptr $ \pcre_ptr -> do
nsub <- getNumSubs' pcre_ptr
let nsub_int :: Int
nsub_int = fi nsub
ovec_size :: CInt
ovec_size = ((nsub + 1) * 3)
ovec_bytes :: Int
ovec_bytes = (fi ovec_size) * (4)
allocaBytes ovec_bytes $ \ovec -> do
nullTest ovec "wrapMatch ovec" $ do
r@(ReturnCode r') <- c_pcre_exec pcre_ptr nullPtr cstr (fi len) (fi startOffset) flags ovec ovec_size
if r == retNoMatch
then return (Right Nothing)
else if r' < 0
then wrapRC r
else do
let pairsSet :: Int
pairsSet = if r == retNeededMoreSpace
then nsub_int + 1
else fi r'
extraPairs :: [(Int,Int)]
extraPairs = replicate (nsub_int + 1 pairsSet)
(unusedOffset,unusedOffset)
pairs <- return . toPairs =<< mapM (peekElemOff ovec) [0 .. ((pairsSet*2)1)]
return . Right . Just $ (pairs ++ extraPairs)
wrapMatchAll (Regex pcre_fptr _ flags) (cstr,len) = do
nullTest cstr "wrapMatchAll cstr" $ do
withForeignPtr pcre_fptr $ \regex -> do
nsub <- getNumSubs' regex
let nsub_int :: Int
nsub_int = fi nsub
ovec_size :: CInt
ovec_size = ((nsub + 1) * 3)
ovec_bytes :: Int
ovec_bytes = (fi ovec_size) * (4)
clen = fi len
flags' = (execNotEmpty .|. execAnchored .|. flags)
allocaBytes ovec_bytes $ \ovec ->
nullTest ovec "wrapMatchAll ovec" $
let loop acc flags_in_use pos = do
r@(ReturnCode r') <- c_pcre_exec regex nullPtr cstr clen (fi pos) flags_in_use ovec ovec_size
if r == retNoMatch
then return (Right (acc []))
else if r' < 0
then wrapRC r
else do
let pairsSet = if r == retNeededMoreSpace then nsub_int+1 else fi r'
pairs <- return . toPairs =<< mapM (peekElemOff ovec) [0 .. ((pairsSet*2)1)]
let acc' = acc . (toMatchArray nsub_int pairs:)
case pairs of
[] -> return (Right (acc' []))
((s,e):_) | s==e -> if s == len
then return (Right (acc' []))
else loop acc' flags' e
| otherwise -> loop acc' flags e
in loop id flags 0
toMatchArray :: Int -> [(Int,Int)] -> Array Int (Int,Int)
toMatchArray n pairs = accumArray (\_ (s,e) -> (s,(es))) (1,0) (0,n) (zip [0..] pairs)
toPairs :: [CInt] -> [(Int,Int)]
toPairs [] = []
toPairs (a:b:rest) = (fi a,fi b):toPairs rest
toPairs [_] = error "Should not have just one element in WrapPCRE.wrapMatchAll.toPairs"
wrapCount (Regex pcre_fptr _ flags) (cstr,len) = do
nullTest cstr "wrapCount cstr" $ do
withForeignPtr pcre_fptr $ \pcre_ptr -> do
nsub <- getNumSubs' pcre_ptr
let ovec_size :: CInt
ovec_size = ((nsub + 1) * 3)
ovec_bytes :: Int
ovec_bytes = (fi ovec_size) * (4)
clen = fi len
allocaBytes ovec_bytes $ \ovec ->
nullTest ovec "wrapCount ovec" $
let act pos = c_pcre_exec pcre_ptr nullPtr cstr clen (fi pos) flags ovec ovec_size
loop acc pos | acc `seq` pos `seq` False = undefined
| otherwise = do
r@(ReturnCode r') <- act pos
if r == retNoMatch
then return (Right acc)
else if r' < 0
then wrapRC r
else do
pairs <- return . toPairs =<< mapM (peekElemOff ovec) [0,1]
case pairs of
[] -> return (Right (succ acc))
((s,e):_) | s==e -> return (Right (succ acc))
| otherwise -> loop (succ acc) e
in loop 0 0
getVersion = unsafePerformIO $ do
version <- c_pcre_version
if version == nullPtr
then return (Just "pcre_version was null")
else return . Just =<< peekCString version
configUTF8 = unsafePerformIO $
alloca $ \ptrVal -> do
when (ptrVal == nullPtr) (fail "Text.Regex.PCRE.Wrap.configUTF8 could not alloca CInt!!!")
c_pcre_config pcreConfigUtf8 ptrVal
val <- peek ptrVal
case val of
(1 :: CInt) -> return True
0 -> return False
_ -> return False
foreign import ccall unsafe "pcre.h pcre_compile"
c_pcre_compile :: CString -> CompOption -> Ptr CString -> Ptr CInt -> CString -> IO (Ptr PCRE)
foreign import ccall unsafe "&free"
c_ptr_free :: FinalizerPtr a
foreign import ccall unsafe "pcre.h pcre_exec"
c_pcre_exec :: Ptr PCRE -> Ptr PCRE_Extra -> CString -> CInt -> CInt -> ExecOption -> Ptr CInt -> CInt -> IO ReturnCode
foreign import ccall unsafe "pcre.h pcre_fullinfo"
c_pcre_fullinfo :: Ptr PCRE -> Ptr PCRE_Extra -> InfoWhat -> Ptr a -> IO CInt
foreign import ccall unsafe "pcre.h pcre_version"
c_pcre_version :: IO (Ptr CChar)
foreign import ccall unsafe "pcre.h pcre_config"
c_pcre_config :: ConfigWhat -> Ptr a -> IO CInt
compAnchored :: CompOption
compAnchored = CompOption 16
compAutoCallout :: CompOption
compAutoCallout = CompOption 16384
compCaseless :: CompOption
compCaseless = CompOption 1
compDollarEndOnly :: CompOption
compDollarEndOnly = CompOption 32
compDotAll :: CompOption
compDotAll = CompOption 4
compExtended :: CompOption
compExtended = CompOption 8
compExtra :: CompOption
compExtra = CompOption 64
compFirstLine :: CompOption
compFirstLine = CompOption 262144
compMultiline :: CompOption
compMultiline = CompOption 2
compNoAutoCapture :: CompOption
compNoAutoCapture = CompOption 4096
compUngreedy :: CompOption
compUngreedy = CompOption 512
compUTF8 :: CompOption
compUTF8 = CompOption 2048
compNoUTF8Check :: CompOption
compNoUTF8Check = CompOption 8192
execAnchored :: ExecOption
execAnchored = ExecOption 16
execNotBOL :: ExecOption
execNotBOL = ExecOption 128
execNotEOL :: ExecOption
execNotEOL = ExecOption 256
execNotEmpty :: ExecOption
execNotEmpty = ExecOption 1024
execNoUTF8Check :: ExecOption
execNoUTF8Check = ExecOption 8192
execPartial :: ExecOption
execPartial = ExecOption 32768
retNoMatch :: ReturnCode
retNoMatch = ReturnCode (1)
retNull :: ReturnCode
retNull = ReturnCode (2)
retBadOption :: ReturnCode
retBadOption = ReturnCode (3)
retBadMagic :: ReturnCode
retBadMagic = ReturnCode (4)
retUnknownNode :: ReturnCode
retUnknownNode = ReturnCode (5)
retNoMemory :: ReturnCode
retNoMemory = ReturnCode (6)
retNoSubstring :: ReturnCode
retNoSubstring = ReturnCode (7)
pcreInfoCapturecount :: InfoWhat
pcreInfoCapturecount = InfoWhat 2
pcreConfigUtf8 :: ConfigWhat
pcreConfigUtf8 = ConfigWhat 0