{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Typst.Regex
( RE (..),
RegexMatch (..),
replaceRegex,
splitRegex,
makeLiteralRE,
makeRE,
match,
matchAll,
extract,
)
where
import qualified Data.Array as Array
import Data.Text (Text)
import qualified Data.Text as T
import Data.Typeable (Typeable)
import Text.Regex.TDFA (Regex, extract)
import qualified Text.Regex.TDFA as TDFA
import qualified Text.Regex.TDFA.Text as TDFA
data RE = RE !Text !Regex
deriving (Typeable)
instance Eq RE where
RE Text
t1 Regex
_ == :: RE -> RE -> Bool
== RE Text
t2 Regex
_ = Text
t1 Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
t2
instance Ord RE where
compare :: RE -> RE -> Ordering
compare (RE Text
t1 Regex
_) (RE Text
t2 Regex
_) = Text -> Text -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Text
t1 Text
t2
instance Show RE where
show :: RE -> String
show (RE Text
t Regex
_) = String
"/" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
T.unpack Text
t String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"/"
data RegexMatch = RegexMatch
{ RegexMatch -> Int
matchStart :: Int,
RegexMatch -> Int
matchEnd :: Int,
RegexMatch -> Text
matchText :: Text,
RegexMatch -> [Text]
matchCaptures :: [Text]
}
deriving (RegexMatch -> RegexMatch -> Bool
(RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> Bool) -> Eq RegexMatch
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: RegexMatch -> RegexMatch -> Bool
== :: RegexMatch -> RegexMatch -> Bool
$c/= :: RegexMatch -> RegexMatch -> Bool
/= :: RegexMatch -> RegexMatch -> Bool
Eq, Eq RegexMatch
Eq RegexMatch =>
(RegexMatch -> RegexMatch -> Ordering)
-> (RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> Bool)
-> (RegexMatch -> RegexMatch -> RegexMatch)
-> (RegexMatch -> RegexMatch -> RegexMatch)
-> Ord RegexMatch
RegexMatch -> RegexMatch -> Bool
RegexMatch -> RegexMatch -> Ordering
RegexMatch -> RegexMatch -> RegexMatch
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: RegexMatch -> RegexMatch -> Ordering
compare :: RegexMatch -> RegexMatch -> Ordering
$c< :: RegexMatch -> RegexMatch -> Bool
< :: RegexMatch -> RegexMatch -> Bool
$c<= :: RegexMatch -> RegexMatch -> Bool
<= :: RegexMatch -> RegexMatch -> Bool
$c> :: RegexMatch -> RegexMatch -> Bool
> :: RegexMatch -> RegexMatch -> Bool
$c>= :: RegexMatch -> RegexMatch -> Bool
>= :: RegexMatch -> RegexMatch -> Bool
$cmax :: RegexMatch -> RegexMatch -> RegexMatch
max :: RegexMatch -> RegexMatch -> RegexMatch
$cmin :: RegexMatch -> RegexMatch -> RegexMatch
min :: RegexMatch -> RegexMatch -> RegexMatch
Ord, Typeable)
replaceRegex :: RE -> Maybe Int -> (RegexMatch -> Text) -> Text -> Text
replaceRegex :: RE -> Maybe Int -> (RegexMatch -> Text) -> Text -> Text
replaceRegex (RE Text
_ Regex
re) Maybe Int
mbCount RegexMatch -> Text
replaceFn Text
strIn =
let matches :: [MatchArray]
matches = ([MatchArray] -> [MatchArray])
-> (Int -> [MatchArray] -> [MatchArray])
-> Maybe Int
-> [MatchArray]
-> [MatchArray]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [MatchArray] -> [MatchArray]
forall a. a -> a
id Int -> [MatchArray] -> [MatchArray]
forall a. Int -> [a] -> [a]
take Maybe Int
mbCount ([MatchArray] -> [MatchArray]) -> [MatchArray] -> [MatchArray]
forall a b. (a -> b) -> a -> b
$ Regex -> Text -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
re Text
strIn
getCaptures :: Array i (Int, Int) -> [Text]
getCaptures Array i (Int, Int)
m =
((Int, Int) -> Text) -> [(Int, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map
(\(Int
off, Int
len) -> (Int, Int) -> Text -> Text
forall source. Extract source => (Int, Int) -> source -> source
extract (Int
off, Int
len) Text
strIn)
(Int -> [(Int, Int)] -> [(Int, Int)]
forall a. Int -> [a] -> [a]
drop Int
1 (Array i (Int, Int) -> [(Int, Int)]
forall i e. Array i e -> [e]
Array.elems Array i (Int, Int)
m))
go :: Int -> [Array i (Int, Int)] -> Text
go Int
i [] = Int -> Text -> Text
T.drop Int
i Text
strIn
go Int
i (Array i (Int, Int)
m : [Array i (Int, Int)]
rest) =
Int -> Text -> Text
forall a b. a -> b -> b
seq Int
i (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
let (Int
off, Int
len) = Array i (Int, Int)
m Array i (Int, Int) -> i -> (Int, Int)
forall i e. Ix i => Array i e -> i -> e
Array.! i
0
in ( if Int
off Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
i
then Int -> Int -> Text -> Text
slice Int
i (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) Text
strIn
else Text
forall a. Monoid a => a
mempty
)
Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> RegexMatch -> Text
replaceFn
RegexMatch
{ matchStart :: Int
matchStart = Int
off,
matchEnd :: Int
matchEnd = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len,
matchText :: Text
matchText = (Int, Int) -> Text -> Text
forall source. Extract source => (Int, Int) -> source -> source
extract (Int
off, Int
len) Text
strIn,
matchCaptures :: [Text]
matchCaptures = Array i (Int, Int) -> [Text]
forall {i}. Array i (Int, Int) -> [Text]
getCaptures Array i (Int, Int)
m
}
Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> [Array i (Int, Int)] -> Text
go (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len) [Array i (Int, Int)]
rest
slice :: Int -> Int -> Text -> Text
slice Int
pos Int
len = Int -> Text -> Text
T.take Int
len (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.drop Int
pos
in Int -> [MatchArray] -> Text
forall {i}. (Ix i, Num i) => Int -> [Array i (Int, Int)] -> Text
go Int
0 [MatchArray]
matches
makeRE :: MonadFail m => Text -> m RE
makeRE :: forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE Text
t =
Text -> Regex -> RE
RE Text
t'
(Regex -> RE) -> m Regex -> m RE
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (String -> m Regex)
-> (Regex -> m Regex) -> Either String Regex -> m Regex
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
String -> m Regex
forall a. String -> m a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail
Regex -> m Regex
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure
(CompOption -> ExecOption -> Text -> Either String Regex
TDFA.compile CompOption
compopts ExecOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
TDFA.defaultExecOpt Text
t')
where
(Bool
caseSensitive, Text
t') =
if Text
"(?i)" Text -> Text -> Bool
`T.isPrefixOf` Text
t
then (Bool
False, String -> Text
T.pack (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> ShowS
go Bool
False ShowS -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop Int
4 Text
t)
else (Bool
True, String -> Text
T.pack (String -> Text) -> (Text -> String) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> ShowS
go Bool
False ShowS -> (Text -> String) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
t)
compopts :: CompOption
compopts = CompOption
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
TDFA.defaultCompOpt {TDFA.caseSensitive = caseSensitive}
go :: Bool -> ShowS
go Bool
_ [] = []
go Bool
True (Char
']' : String
cs) = Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
False String
cs
go Bool
False (Char
'[' : String
cs) = Char
'[' Char -> ShowS
forall a. a -> [a] -> [a]
:
case String
cs of
Char
'^':Char
']':String
ds -> Char
'^' Char -> ShowS
forall a. a -> [a] -> [a]
: Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
True String
ds
Char
'^':Char
'\\':Char
']':String
ds -> Char
'^' Char -> ShowS
forall a. a -> [a] -> [a]
: Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
True String
ds
Char
']':String
ds -> Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
True String
ds
Char
'\\':Char
']':String
ds -> Char
']' Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
True String
ds
String
_ -> Bool -> ShowS
go Bool
True String
cs
go Bool
False (Char
'?' : String
cs) = String
"{0,1}" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Bool -> ShowS
go Bool
False String
cs
go Bool
False (Char
'+' : String
cs) = String
"{1,}" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Bool -> ShowS
go Bool
False String
cs
go Bool
inCharClass (Char
'\\' : Char
c : String
cs)
= let f :: ShowS
f = if Bool
inCharClass
then ShowS
forall a. a -> a
id
else \String
x -> String
"[" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
x String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"]"
r :: String
r = case Char
c of
Char
'd' -> ShowS
f String
"[:digit:]"
Char
'D' -> ShowS
f String
"^[:digit:]"
Char
's' -> ShowS
f String
"[:space:]"
Char
'S' -> ShowS
f String
"^[:space:]"
Char
'w' -> ShowS
f String
"[:word:]"
Char
'W' -> ShowS
f String
"^[:word:]"
Char
_ -> [Char
'\\', Char
c]
in String
r String -> ShowS
forall a. [a] -> [a] -> [a]
++ Bool -> ShowS
go Bool
inCharClass String
cs
go Bool
inCharClass (Char
c : String
cs) = Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: Bool -> ShowS
go Bool
inCharClass String
cs
match :: TDFA.RegexContext Regex source target => RE -> source -> target
match :: forall source target.
RegexContext Regex source target =>
RE -> source -> target
match (RE Text
_ Regex
re) source
t = Regex -> source -> target
forall regex source target.
RegexContext regex source target =>
regex -> source -> target
TDFA.match Regex
re source
t
matchAll :: TDFA.RegexLike Regex source => RE -> source -> [TDFA.MatchArray]
matchAll :: forall source.
RegexLike Regex source =>
RE -> source -> [MatchArray]
matchAll (RE Text
_ Regex
re) source
t = Regex -> source -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
re source
t
makeLiteralRE :: MonadFail m => Text -> m RE
makeLiteralRE :: forall (m :: * -> *). MonadFail m => Text -> m RE
makeLiteralRE Text
t
| Text -> Bool
T.null Text
t = Text -> m RE
forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE Text
".{0,0}"
| Bool
otherwise = Text -> m RE
forall (m :: * -> *). MonadFail m => Text -> m RE
makeRE (Text -> m RE) -> Text -> m RE
forall a b. (a -> b) -> a -> b
$ (Text -> Char -> Text) -> Text -> Text -> Text
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl Text -> Char -> Text
go Text
forall a. Monoid a => a
mempty Text
t
where
go :: Text -> Char -> Text
go Text
acc Char
c = if Char -> Bool
isSpecial Char
c then Text
acc Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack [Char
'\\', Char
c] else Text -> Char -> Text
T.snoc Text
acc Char
c
isSpecial :: Char -> Bool
isSpecial Char
c = Char
c Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
".*?+(){}[]|\\^$" :: [Char])
splitRegex :: RE -> Text -> [Text]
splitRegex :: RE -> Text -> [Text]
splitRegex (RE Text
_ Regex
delim) Text
strIn =
let matches :: [(Int, Int)]
matches = (MatchArray -> (Int, Int)) -> [MatchArray] -> [(Int, Int)]
forall a b. (a -> b) -> [a] -> [b]
map (MatchArray -> Int -> (Int, Int)
forall i e. Ix i => Array i e -> i -> e
Array.! Int
0) (Regex -> Text -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
TDFA.matchAll Regex
delim Text
strIn)
go :: Int -> Text -> [(Int, Int)] -> [Text]
go Int
_i Text
str [] = Text
str Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: []
go Int
i Text
str ((Int
off, Int
len) : [(Int, Int)]
rest) =
let i' :: Int
i' = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
firstline :: Text
firstline = Int -> Text -> Text
T.take (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) Text
str
remainder :: Text
remainder = Int -> Text -> Text
T.drop (Int
i' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) Text
str
in Int -> [Text] -> [Text]
forall a b. a -> b -> b
seq Int
i' ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$
if Text -> Bool
T.null Text
remainder
then [Text
firstline, Text
""]
else Text
firstline Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: Int -> Text -> [(Int, Int)] -> [Text]
go Int
i' Text
remainder [(Int, Int)]
rest
in Int -> Text -> [(Int, Int)] -> [Text]
go Int
0 Text
strIn [(Int, Int)]
matches