{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}

-- |
-- Module      : Data.Text.Internal.Search
-- Copyright   : (c) Bryan O'Sullivan 2009
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- Fast substring search for 'Text', based on work by Boyer, Moore,
-- Horspool, Sunday, and Lundh.
--
-- References:
--
-- * R. S. Boyer, J. S. Moore: A Fast String Searching Algorithm.
--   Communications of the ACM, 20, 10, 762-772 (1977)
--
-- * R. N. Horspool: Practical Fast Searching in Strings.  Software -
--   Practice and Experience 10, 501-506 (1980)
--
-- * D. M. Sunday: A Very Fast Substring Search Algorithm.
--   Communications of the ACM, 33, 8, 132-142 (1990)
--
-- * F. Lundh: The Fast Search Algorithm.
--   <http://effbot.org/zone/stringlib.htm> (2006)

module Data.Text.Internal.Search
    (
      indices
    ) where

import qualified Data.Text.Array as A
import Data.Word (Word64)
import Data.Text.Internal (Text(..))
import Data.Bits ((.|.), (.&.))
import Data.Text.Internal.Unsafe.Shift (shiftL)

data T = {-# UNPACK #-} !Word64 :* {-# UNPACK #-} !Int

-- | /O(n+m)/ Find the offsets of all non-overlapping indices of
-- @needle@ within @haystack@.  The offsets returned represent
-- uncorrected indices in the low-level \"needle\" array, to which its
-- offset must be added.
--
-- In (unlikely) bad cases, this algorithm's complexity degrades
-- towards /O(n*m)/.
indices :: Text                -- ^ Substring to search for (@needle@)
        -> Text                -- ^ Text to search in (@haystack@)
        -> [Int]
indices :: Text -> Text -> [Int]
indices _needle :: Text
_needle@(Text Array
narr Int
noff Int
nlen) _haystack :: Text
_haystack@(Text Array
harr Int
hoff Int
hlen)
    | Int
nlen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1              = Word16 -> [Int]
scanOne (Int -> Word16
nindex Int
0)
    | Int
nlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 Bool -> Bool -> Bool
|| Int
ldiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = []
    | Bool
otherwise              = Int -> [Int]
scan Int
0
  where
    ldiff :: Int
ldiff    = Int
hlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nlen
    nlast :: Int
nlast    = Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
    z :: Word16
z        = Int -> Word16
nindex Int
nlast
    nindex :: Int -> Word16
nindex Int
k = Array -> Int -> Word16
A.unsafeIndex Array
narr (Int
noffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
k)
    hindex :: Int -> Word16
hindex Int
k = Array -> Int -> Word16
A.unsafeIndex Array
harr (Int
hoffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
k)
    hindex' :: Int -> Word16
hindex' Int
k | Int
k Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
hlen  = Word16
0
              | Bool
otherwise = Array -> Int -> Word16
A.unsafeIndex Array
harr (Int
hoffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
k)
    buildTable :: Int -> Word64 -> Int -> T
buildTable !Int
i !Word64
msk !Int
skp
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlast           = (Word64
msk Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word16 -> Word64
forall a a. (UnsafeShift a, Integral a, Num a) => a -> a
swizzle Word16
z) Word64 -> Int -> T
:* Int
skp
        | Bool
otherwise            = Int -> Word64 -> Int -> T
buildTable (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Word64
msk Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. Word16 -> Word64
forall a a. (UnsafeShift a, Integral a, Num a) => a -> a
swizzle Word16
c) Int
skp'
        where c :: Word16
c                = Int -> Word16
nindex Int
i
              skp' :: Int
skp' | Word16
c Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
z    = Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2
                   | Bool
otherwise = Int
skp
    swizzle :: a -> a
swizzle a
k = a
1 a -> Int -> a
forall a. UnsafeShift a => a -> Int -> a
`shiftL` (a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
k Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
    scan :: Int -> [Int]
scan !Int
i
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
ldiff                  = []
        | Word16
c Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
z Bool -> Bool -> Bool
&& Int -> Bool
candidateMatch Int
0 = Int
i Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> [Int]
scan (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlen)
        | Bool
otherwise                  = Int -> [Int]
scan (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
delta)
        where c :: Word16
c = Int -> Word16
hindex (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nlast)
              candidateMatch :: Int -> Bool
candidateMatch !Int
j
                    | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nlast               = Bool
True
                    | Int -> Word16
hindex (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j) Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
/= Int -> Word16
nindex Int
j = Bool
False
                    | Bool
otherwise                = Int -> Bool
candidateMatch (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
              delta :: Int
delta | Bool
nextInPattern = Int
nlen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
                    | Word16
c Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
z        = Int
skip Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
                    | Bool
otherwise     = Int
1
                where nextInPattern :: Bool
nextInPattern = Word64
mask Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. Word16 -> Word64
forall a a. (UnsafeShift a, Integral a, Num a) => a -> a
swizzle (Int -> Word16
hindex' (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
nlen)) Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
0
              !(Word64
mask :* Int
skip)       = Int -> Word64 -> Int -> T
buildTable Int
0 Word64
0 (Int
nlenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2)
    scanOne :: Word16 -> [Int]
scanOne Word16
c = Int -> [Int]
loop Int
0
        where loop :: Int -> [Int]
loop !Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
hlen     = []
                      | Int -> Word16
hindex Int
i Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
c = Int
i Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> [Int]
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                      | Bool
otherwise     = Int -> [Int]
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
{-# INLINE indices #-}