{-# LANGUAGE BangPatterns        #-}
{-# LANGUAGE FlexibleContexts    #-}
{-# LANGUAGE MagicHash           #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UnboxedTuples       #-}


{-|
Module      : Z.Data.Text.Search
Description : Searching text
Copyright   : (c) Dong Han, 2017-2018
License     : BSD
Maintainer  : winterland1989@gmail.com
Stability   : experimental
Portability : non-portable

-}

module Z.Data.Text.Search (
  -- * element-wise search
    elem, notElem
  -- * Searching by equality
  , findIndices
  , find, findR
  , findIndex
  , findIndexR
  , filter, partition
  ) where


import           Control.Monad.ST
import           Data.Word
import           Prelude                 hiding (elem, notElem, filter)
import           Z.Data.Array
import           Z.Data.Text.Base
import           Z.Data.Text.UTF8Codec
import qualified Z.Data.Vector.Base    as V

findIndices :: (Char -> Bool) -> Text -> [Int]
{-# INLINE findIndices #-}
findIndices :: (Char -> Bool) -> Text -> [Int]
findIndices Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int -> Int -> [Int]
forall a. Num a => a -> Int -> [a]
go Int
0 Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: a -> Int -> [a]
go !a
i !Int
p | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = []
             | Char -> Bool
f Char
x       = a
i a -> [a] -> [a]
forall a. a -> [a] -> [a]
: a -> Int -> [a]
go (a
ia -> a -> a
forall a. Num a => a -> a -> a
+a
1) (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off)
             | Bool
otherwise = a -> Int -> [a]
go (a
ia -> a -> a
forall a. Num a => a -> a -> a
+a
1) (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off)
        where (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
p

-- | /O(n)/ find the first char matching the predicate in a text
-- from left to right, if there isn't one, return the index point to the end of the byte slice.
find :: (Char -> Bool)
     -> Text
     -> (Int, Int, Maybe Char)  -- ^ (char index, byte index in slice, matching char)
{-# INLINE find #-}
find :: (Char -> Bool) -> Text -> (Int, Int, Maybe Char)
find Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int -> Int -> (Int, Int, Maybe Char)
forall t. Num t => t -> Int -> (t, Int, Maybe Char)
go Int
0 Int
s
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: t -> Int -> (t, Int, Maybe Char)
go !t
i !Int
j | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end  = (t
i, Int
j, Maybe Char
forall a. Maybe a
Nothing)
             | Bool
otherwise =
                let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
j
                in if Char -> Bool
f Char
x
                    then (t
i, Int
j, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x)
                    else t -> Int -> (t, Int, Maybe Char)
go (t
it -> t -> t
forall a. Num a => a -> a -> a
+t
1) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off)

-- | /O(n)/ find the first char matching the predicate in a text
-- from right to left, if there isn't one, return the index point to the start of the byte slice.
--
findR :: (Char -> Bool)
      -> Text
      -> (Int, Int, Maybe Char)  -- ^ (char index(counting backwards), byte index in slice, matching char)
{-# INLINE findR #-}
findR :: (Char -> Bool) -> Text -> (Int, Int, Maybe Char)
findR Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = Int -> Int -> (Int, Int, Maybe Char)
forall t. Num t => t -> Int -> (t, Int, Maybe Char)
go Int
0 (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
  where
    go :: t -> Int -> (t, Int, Maybe Char)
go !t
i !Int
j | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s     = (t
i, Int
j, Maybe Char
forall a. Maybe a
Nothing)
             | Bool
otherwise =
                let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeCharReverse PrimArray Word8
arr Int
j
                in if Char -> Bool
f Char
x
                    then (t
i, Int
j, Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x)
                    else t -> Int -> (t, Int, Maybe Char)
go (t
it -> t -> t
forall a. Num a => a -> a -> a
+t
1) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
off)

--------------------------------------------------------------------------------

-- | /O(n)/ find the index of the byte slice.
findIndex :: (Char -> Bool) -> Text -> Int
{-# INLINE findIndex #-}
findIndex :: (Char -> Bool) -> Text -> Int
findIndex Char -> Bool
f Text
t = case (Char -> Bool) -> Text -> (Int, Int, Maybe Char)
find Char -> Bool
f Text
t of (Int
_, Int
i, Maybe Char
_) -> Int
i

-- | /O(n)/ find the index of the byte slice in reverse order.
findIndexR ::  (Char -> Bool) -> Text -> Int
{-# INLINE findIndexR #-}
findIndexR :: (Char -> Bool) -> Text -> Int
findIndexR Char -> Bool
f Text
t = case (Char -> Bool) -> Text -> (Int, Int, Maybe Char)
findR Char -> Bool
f Text
t of (Int
_, Int
i, Maybe Char
_) -> Int
i

-- | /O(n)/ 'filter', applied to a predicate and a text,
-- returns a text containing those chars that satisfy the
-- predicate.
filter :: (Char -> Bool) -> Text -> Text
{-# INLINE filter #-}
filter :: (Char -> Bool) -> Text -> Text
filter Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l)) = PrimVector Word8 -> Text
Text (Int
-> (forall s. MArr (IArray PrimVector) s Word8 -> ST s Int)
-> PrimVector Word8
forall (v :: * -> *) a.
(Vec v a, HasCallStack) =>
Int -> (forall s. MArr (IArray v) s a -> ST s Int) -> v a
V.createN Int
l (Int -> Int -> MutablePrimArray s Word8 -> ST s Int
forall s. Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go Int
s Int
0))
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int -> MutablePrimArray s Word8 -> ST s Int
    go :: Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go !Int
i !Int
j MutablePrimArray s Word8
marr
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end = Int -> ST s Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
j
        | Bool
otherwise =
            let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
i
            in if Char -> Bool
f Char
x
                then do
                    Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
forall s.
Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
copyChar Int
off MutablePrimArray s Word8
marr Int
j PrimArray Word8
arr Int
i
                    Int -> Int -> MutablePrimArray s Word8 -> ST s Int
forall s. Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) MutablePrimArray s Word8
marr
                else Int -> Int -> MutablePrimArray s Word8 -> ST s Int
forall s. Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) Int
j MutablePrimArray s Word8
marr

-- | /O(n)/ The 'partition' function takes a predicate, a text, returns
-- a pair of text with codepoints which do and do not satisfy the
-- predicate, respectively; i.e.,
--
-- > partition p txt == (filter p txt, filter (not . p) txt)
partition :: (Char -> Bool) -> Text -> (Text, Text)
{-# INLINE partition #-}
partition :: (Char -> Bool) -> Text -> (Text, Text)
partition Char -> Bool
f (Text (V.PrimVector PrimArray Word8
arr Int
s Int
l))
    | Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0    = (Text
empty, Text
empty)
    | Bool
otherwise = let !(PrimVector Word8
bs1, PrimVector Word8
bs2) = Int
-> Int
-> (forall s.
    MArr (IArray PrimVector) s Word8
    -> MArr (IArray PrimVector) s Word8 -> ST s (Int, Int))
-> (PrimVector Word8, PrimVector Word8)
forall (v :: * -> *) a (u :: * -> *) b.
(Vec v a, Vec u b, HasCallStack) =>
Int
-> Int
-> (forall s.
    MArr (IArray v) s a -> MArr (IArray u) s b -> ST s (Int, Int))
-> (v a, u b)
V.createN2 Int
l Int
l (Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
forall s.
Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
go Int
0 Int
0 Int
s) in (PrimVector Word8 -> Text
Text PrimVector Word8
bs1, PrimVector Word8 -> Text
Text PrimVector Word8
bs2)
  where
    !end :: Int
end = Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    go :: Int -> Int -> Int -> MutablePrimArray s Word8 -> MutablePrimArray s Word8 -> ST s (Int, Int)
    go :: Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
go !Int
i !Int
j !Int
p !MutablePrimArray s Word8
mba0 !MutablePrimArray s Word8
mba1
        | Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end   = (Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
i, Int
j)
        | Bool
otherwise =
            let (# Char
x, Int
off #) = PrimArray Word8 -> Int -> (# Char, Int #)
decodeChar PrimArray Word8
arr Int
p
            in if Char -> Bool
f Char
x
                then Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
forall s.
Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
copyChar Int
off MutablePrimArray s Word8
mba0 Int
i PrimArray Word8
arr Int
p ST s () -> ST s (Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
forall s.
Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) Int
j (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) MutablePrimArray s Word8
mba0 MutablePrimArray s Word8
mba1
                else Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
forall s.
Int
-> MutablePrimArray s Word8
-> Int
-> PrimArray Word8
-> Int
-> ST s ()
copyChar Int
off MutablePrimArray s Word8
mba1 Int
j PrimArray Word8
arr Int
p ST s () -> ST s (Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
forall s.
Int
-> Int
-> Int
-> MutablePrimArray s Word8
-> MutablePrimArray s Word8
-> ST s (Int, Int)
go Int
i (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
off) MutablePrimArray s Word8
mba0 MutablePrimArray s Word8
mba1

--------------------------------------------------------------------------------
-- Searching by equality

-- | /O(n)/ 'elem' test if given char is in given text.
elem :: Char -> Text -> Bool
{-# INLINE elem #-}
elem :: Char -> Text -> Bool
elem Char
x Text
t = case (Char -> Bool) -> Text -> (Int, Int, Maybe Char)
find (Char
xChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==) Text
t of (Int
_,Int
_,Maybe Char
Nothing) -> Bool
False
                                (Int, Int, Maybe Char)
_             -> Bool
True

-- | /O(n)/ @not . elem@
notElem ::  Char -> Text -> Bool
{-# INLINE notElem #-}
notElem :: Char -> Text -> Bool
notElem Char
x = Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text -> Bool
elem Char
x