{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Typst.Regex
( RE (..),
RegexMatch (..),
replaceRegex,
splitRegex,
makeLiteralRE,
makeRE,
match,
matchAll,
extract,
)
where
import qualified Data.Array as Array
import Data.Text (Text)
import qualified Data.Text as T
import Data.Typeable (Typeable)
import Text.Regex.TDFA (Regex, extract)
import qualified Text.Regex.TDFA as TDFA
import qualified Text.Regex.TDFA.Text as TDFA
data RE = RE !Text !Regex
deriving (Typeable)
instance Eq RE where
RE Text
t1 Regex
_ == :: RE -> RE -> Bool
== RE Text
t2 Regex
_ = Text
t1 forall a. Eq a => a -> a -> Bool
== Text
t2
instance Ord RE where
compare :: RE -> RE -> Ordering
compare (RE Text
t1 Regex
_) (RE Text
t2 Regex
_) = forall a. Ord a => a -> a -> Ordering
compare Text
t1 Text
t2
instance Show RE where
show :: RE -> String
show (RE Text
t Regex
_) = String
"/" forall a. Semigroup a => a -> a -> a
<> Text -> String
T.unpack Text
t forall a. Semigroup a => a -> a -> a
<> String
"/"
data RegexMatch = RegexMatch
{ RegexMatch -> Int
matchStart :: Int,
RegexMatch -> Int
matchEnd :: Int,
RegexMatch -> Text
matchText :: Text,
RegexMatch -> [Text]
matchCaptures :: [Text]
}
deriving (RegexMatch -> RegexMatch -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RegexMatch -> RegexMatch -> Bool
$c/= :: RegexMatch -> RegexMatch -> Bool
== :: RegexMatch -> RegexMatch -> Bool
$c== :: RegexMatch -> RegexMatch -> Bool
Eq, Eq RegexMatch
RegexMatch -> RegexMatch -> Bool
RegexMatch -> RegexMatch -> Ordering
RegexMatch -> RegexMatch -> RegexMatch
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: RegexMatch -> RegexMatch -> RegexMatch
$cmin :: RegexMatch -> RegexMatch -> RegexMatch
max :: RegexMatch -> RegexMatch -> RegexMatch
$cmax :: RegexMatch -> RegexMatch -> RegexMatch
>= :: RegexMatch -> RegexMatch -> Bool
$c>= :: RegexMatch -> RegexMatch -> Bool
> :: RegexMatch -> RegexMatch -> Bool
$c> :: RegexMatch -> RegexMatch -> Bool
<= :: RegexMatch -> RegexMatch -> Bool
$c<= :: RegexMatch -> RegexMatch -> Bool
< :: RegexMatch -> RegexMatch -> Bool
$c< :: RegexMatch -> RegexMatch -> Bool
compare :: RegexMatch -> RegexMatch -> Ordering
$ccompare :: RegexMatch -> RegexMatch -> Ordering
Ord, Typeable)
replaceRegex :: RE -> Maybe Int -> (RegexMatch -> Text) -> Text -> Text
replaceRegex :: RE -> Maybe Int -> (RegexMatch -> Text) -> Text -> Text
replaceRegex (RE Text
_ Regex
re) Maybe Int
mbCount RegexMatch -> Text
replaceFn Text
strIn =
let matches :: [MatchArray]
matches = forall b a. b -> (a -> b) -> Maybe a -> b
maybe forall a. a -> a
id forall a. Int -> [a] -> [a]
take Maybe Int
mbCount forall a b. (a -> b) -> a -> b
$ forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
re Text
strIn
getCaptures :: Array i (Int, Int) -> [Text]
getCaptures Array i (Int, Int)
m =
forall a b. (a -> b) -> [a] -> [b]
map
(\(Int
off, Int
len) -> forall source. Extract source => (Int, Int) -> source -> source
extract (Int
off, Int
len) Text
strIn)
(forall a. Int -> [a] -> [a]
drop Int
1 (forall i e. Array i e -> [e]
Array.elems Array i (Int, Int)
m))
go :: Int -> [Array i (Int, Int)] -> Text
go Int
i [] = Int -> Text -> Text
T.drop Int
i Text
strIn
go Int
i (Array i (Int, Int)
m : [Array i (Int, Int)]
rest) =
seq :: forall a b. a -> b -> b
seq Int
i forall a b. (a -> b) -> a -> b
$
let (Int
off, Int
len) = Array i (Int, Int)
m forall i e. Ix i => Array i e -> i -> e
Array.! i
0
in ( if Int
off forall a. Ord a => a -> a -> Bool
> Int
i
then Int -> Int -> Text -> Text
slice Int
i (Int
off forall a. Num a => a -> a -> a
- Int
i) Text
strIn
else forall a. Monoid a => a
mempty
)
forall a. Semigroup a => a -> a -> a
<> RegexMatch -> Text
replaceFn
RegexMatch
{ matchStart :: Int
matchStart = Int
off,
matchEnd :: Int
matchEnd = Int
off forall a. Num a => a -> a -> a
+ Int
len,
matchText :: Text
matchText = forall source. Extract source => (Int, Int) -> source -> source
extract (Int
off, Int
len) Text
strIn,
matchCaptures :: [Text]
matchCaptures = forall {i}. Array i (Int, Int) -> [Text]
getCaptures Array i (Int, Int)
m
}
forall a. Semigroup a => a -> a -> a
<> Int -> [Array i (Int, Int)] -> Text
go (Int
off forall a. Num a => a -> a -> a
+ Int
len) [Array i (Int, Int)]
rest
slice :: Int -> Int -> Text -> Text
slice Int
pos Int
len = Int -> Text -> Text
T.take Int
len forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.drop Int
pos
in forall {i}. (Ix i, Num i) => Int -> [Array i (Int, Int)] -> Text
go Int
0 [MatchArray]
matches
makeRE :: MonadFail m => Text -> m RE
makeRE :: forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE Text
t =
Text -> Regex -> RE
RE Text
t'
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
forall (m :: * -> *) a. MonadFail m => String -> m a
fail
forall (f :: * -> *) a. Applicative f => a -> f a
pure
(CompOption -> ExecOption -> Text -> Either String Regex
TDFA.compile CompOption
compopts forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
TDFA.defaultExecOpt Text
t')
where
(Bool
caseSensitive, Text
t') =
if Text
"(?i)" Text -> Text -> Bool
`T.isPrefixOf` Text
t
then (Bool
False, String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
go forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop Int
4 Text
t)
else (Bool
True, String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
go forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack forall a b. (a -> b) -> a -> b
$ Text
t)
compopts :: CompOption
compopts = forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
TDFA.defaultCompOpt {caseSensitive :: Bool
TDFA.caseSensitive = Bool
caseSensitive}
go :: ShowS
go [] = []
go (Char
'?' : String
cs) = String
"{0,1}" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
go (Char
'+' : String
cs) = String
"{1,}" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
go (Char
'\\' : Char
c : String
cs)
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'd' = String
"[[:digit:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'D' = String
"[^[:digit:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
| Char
c forall a. Eq a => a -> a -> Bool
== Char
's' = String
"[[:space:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'S' = String
"[^[:space:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'w' = String
"[[:word:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
| Char
c forall a. Eq a => a -> a -> Bool
== Char
'W' = String
"[^[:word:]]" forall a. [a] -> [a] -> [a]
++ ShowS
go String
cs
| Bool
otherwise = Char
'\\' forall a. a -> [a] -> [a]
: Char
c forall a. a -> [a] -> [a]
: ShowS
go String
cs
go (Char
c : String
cs) = Char
c forall a. a -> [a] -> [a]
: ShowS
go String
cs
match :: TDFA.RegexContext Regex source target => RE -> source -> target
match :: forall source target.
RegexContext Regex source target =>
RE -> source -> target
match (RE Text
_ Regex
re) source
t = forall regex source target.
RegexContext regex source target =>
regex -> source -> target
TDFA.match Regex
re source
t
matchAll :: TDFA.RegexLike Regex source => RE -> source -> [TDFA.MatchArray]
matchAll :: forall source.
RegexLike Regex source =>
RE -> source -> [MatchArray]
matchAll (RE Text
_ Regex
re) source
t = forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
re source
t
makeLiteralRE :: MonadFail m => Text -> m RE
makeLiteralRE :: forall (m :: * -> *). MonadFail m => Text -> m RE
makeLiteralRE Text
t
| Text -> Bool
T.null Text
t = forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE Text
".{0,0}"
| Bool
otherwise = forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE forall a b. (a -> b) -> a -> b
$ forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl Text -> Char -> Text
go forall a. Monoid a => a
mempty Text
t
where
go :: Text -> Char -> Text
go Text
acc Char
c = if Char -> Bool
isSpecial Char
c then Text
acc forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack [Char
'\\', Char
c] else Text -> Char -> Text
T.snoc Text
acc Char
c
isSpecial :: Char -> Bool
isSpecial Char
c = Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
".*?+{}[]\\" :: [Char])
splitRegex :: RE -> Text -> [Text]
splitRegex :: RE -> Text -> [Text]
splitRegex (RE Text
_ Regex
delim) Text
strIn =
let matches :: [(Int, Int)]
matches = forall a b. (a -> b) -> [a] -> [b]
map (forall i e. Ix i => Array i e -> i -> e
Array.! Int
0) (forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
delim Text
strIn)
go :: Int -> Text -> [(Int, Int)] -> [Text]
go Int
_i Text
str [] = Text
str forall a. a -> [a] -> [a]
: []
go Int
i Text
str ((Int
off, Int
len) : [(Int, Int)]
rest) =
let i' :: Int
i' = Int
off forall a. Num a => a -> a -> a
+ Int
len
firstline :: Text
firstline = Int -> Text -> Text
T.take (Int
off forall a. Num a => a -> a -> a
- Int
i) Text
str
remainder :: Text
remainder = Int -> Text -> Text
T.drop (Int
i' forall a. Num a => a -> a -> a
- Int
i) Text
str
in seq :: forall a b. a -> b -> b
seq Int
i' forall a b. (a -> b) -> a -> b
$
if Text -> Bool
T.null Text
remainder
then [Text
firstline, Text
""]
else Text
firstline forall a. a -> [a] -> [a]
: Int -> Text -> [(Int, Int)] -> [Text]
go Int
i' Text
remainder [(Int, Int)]
rest
in Int -> Text -> [(Int, Int)] -> [Text]
go Int
0 Text
strIn [(Int, Int)]
matches