{-# OPTIONS_GHC -fno-warn-orphans #-}
module Text.Regex.PCRE.Text.Lazy
(
Regex
, MatchOffset
, MatchLength
, CompOption(CompOption)
, ExecOption(ExecOption)
, ReturnCode
, WrapError
, unusedOffset
, getVersion
, compile
, execute
, regexec
, compBlank
, compAnchored
, compAutoCallout
, compCaseless
, compDollarEndOnly
, compDotAll
, compExtended
, compExtra
, compFirstLine
, compMultiline
, compNoAutoCapture
, compUngreedy
, compUTF8
, compNoUTF8Check
, execBlank
, execAnchored
, execNotBOL
, execNotEOL
, execNotEmpty
, execNoUTF8Check
, execPartial
) where
import Data.Array(Array,listArray)
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import Foreign.C.String(CStringLen,CString)
import Foreign(nullPtr)
import System.IO.Unsafe (unsafePerformIO)
import Text.Regex.Base.Impl
import Text.Regex.Base.RegexLike
import Text.Regex.PCRE.Wrap
instance RegexContext Regex TL.Text TL.Text where
match = polymatch
matchM = polymatchM
instance RegexMaker Regex CompOption ExecOption TL.Text where
makeRegexOpts c e pat = unsafePerformIO $
compile c e pat >>= unwrap
makeRegexOptsM c e pat = either (fail.show) return $ unsafePerformIO $
compile c e pat
instance RegexLike Regex TL.Text where
matchTest re tx = unsafePerformIO $
asCStringLen tx (wrapTest 0 re) >>= unwrap
matchOnce re tx = unsafePerformIO $
execute re tx >>= unwrap
matchAll re tx = unsafePerformIO $
asCStringLen tx (wrapMatchAll re) >>= unwrap
matchCount re tx = unsafePerformIO $
asCStringLen tx (wrapCount re) >>= unwrap
compile :: CompOption
-> ExecOption
-> TL.Text
-> IO (Either (MatchOffset,String) Regex)
compile c e pat =
asCString pat $ wrapCompile c e
execute :: Regex
-> TL.Text
-> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength))))
execute re tx = do
maybeStartEnd <- asCStringLen tx $ wrapMatch 0 re
case maybeStartEnd of
Right Nothing -> return $ Right Nothing
Right (Just parts) ->
return $ Right $ Just $ listArray (0,pred $ length parts)
[ (s,e-s) | (s,e) <- parts ]
Left err -> return $ Left err
regexec :: Regex
-> TL.Text
-> IO (Either WrapError (Maybe (TL.Text, TL.Text, TL.Text, [TL.Text])))
regexec re tx = do
mb <- asCStringLen tx $ wrapMatch 0 re
case mb of
Right Nothing -> return $ Right Nothing
Right (Just parts) -> return $ Right $ Just $ matchedParts parts
Left err -> return $ Left err
where
matchedParts [] = (TL.empty,TL.empty,tx,[])
matchedParts (mtchd@(start,stop):rst) =
( TL.take (fromIntegral start) tx
, getSub mtchd
, TL.drop (fromIntegral stop) tx
, map getSub rst
)
getSub (start,stop)
| start == unusedOffset = TL.empty
| otherwise = TL.take (fromIntegral $ stop-start) $
TL.drop (fromIntegral start) tx
unwrap :: (Show e) => Either e v -> IO v
unwrap x = case x of
Left e -> fail $ "Text.Regex.PCRE.Text died: " ++ show e
Right v -> return v
{-# INLINE asCString #-}
asCString :: TL.Text -> (CString->IO a) -> IO a
asCString = B.unsafeUseAsCString . T.encodeUtf8 . TL.toStrict
{-# INLINE asCStringLen #-}
asCStringLen :: TL.Text -> (CStringLen->IO a) -> IO a
asCStringLen s op = B.unsafeUseAsCStringLen (T.encodeUtf8 $ TL.toStrict s) checked
where
checked cs@(ptr,_)
| ptr == nullPtr = B.unsafeUseAsCStringLen myEmpty $ op . trim
| otherwise = op cs
trim (ptr,_) = (ptr,0)
myEmpty :: B.ByteString
myEmpty = B.pack [0]