{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
module Data.Text.AhoCorasick.Searcher
( Searcher
, build
, buildWithValues
, needles
, numNeedles
, automaton
, caseSensitivity
, containsAny
, setSearcherCaseSensitivity
)
where
import Control.DeepSeq (NFData)
import Data.Hashable (Hashable (hashWithSalt), Hashed, hashed, unhashed)
import Data.Semigroup (Semigroup, (<>))
import Data.Text (Text)
import GHC.Generics (Generic)
#if defined(HAS_AESON)
import Data.Aeson ((.:), (.=))
import qualified Data.Aeson as AE
#endif
import Data.Text.AhoCorasick.Automaton (CaseSensitivity (..))
import qualified Data.Text.AhoCorasick.Automaton as Aho
import qualified Data.Text.Utf16 as Utf16
data Searcher v = Searcher
{ searcherCaseSensitive :: CaseSensitivity
, searcherNeedles :: Hashed [(Text, v)]
, searcherNumNeedles :: Int
, searcherAutomaton :: Aho.AcMachine v
} deriving (Generic)
#if defined(HAS_AESON)
instance AE.ToJSON v => AE.ToJSON (Searcher v) where
toJSON s = AE.object
[ "needles" .= needles s
, "caseSensitivity" .= caseSensitivity s
]
instance (Hashable v, AE.FromJSON v) => AE.FromJSON (Searcher v) where
parseJSON = AE.withObject "Searcher" $ \o -> buildWithValues <$> o .: "caseSensitivity" <*> o .: "needles"
#endif
instance Show (Searcher v) where
show _ = "Searcher _ _ _"
instance Hashable v => Hashable (Searcher v) where
hashWithSalt salt searcher = hashWithSalt salt $ searcherNeedles searcher
{-# INLINE hashWithSalt #-}
instance Eq v => Eq (Searcher v) where
Searcher cx xs nx _ == Searcher cy ys ny _ = (nx, xs, cx) == (ny, ys, cy)
{-# INLINE (==) #-}
instance NFData v => NFData (Searcher v)
instance Semigroup (Searcher ()) where
x <> y
| caseSensitivity x == caseSensitivity y
= buildWithValues (searcherCaseSensitive x) (needles x <> needles y)
| otherwise = error "Combining searchers of different case sensitivity"
{-# INLINE (<>) #-}
build :: CaseSensitivity -> [Text] -> Searcher ()
build case_ = buildWithValues case_ . fmap (\x -> (x, ()))
buildWithValues :: Hashable v => CaseSensitivity -> [(Text, v)] -> Searcher v
{-# INLINABLE buildWithValues #-}
buildWithValues case_ ns =
let
unpack (text, value) = (Utf16.unpackUtf16 text, value)
in
Searcher case_ (hashed ns) (length ns) $ Aho.build $ fmap unpack ns
needles :: Searcher v -> [(Text, v)]
needles = unhashed . searcherNeedles
numNeedles :: Searcher v -> Int
numNeedles = searcherNumNeedles
automaton :: Searcher v -> Aho.AcMachine v
automaton = searcherAutomaton
caseSensitivity :: Searcher v -> CaseSensitivity
caseSensitivity = searcherCaseSensitive
setSearcherCaseSensitivity :: CaseSensitivity -> Searcher v -> Searcher v
setSearcherCaseSensitivity case_ searcher = searcher{
searcherCaseSensitive = case_
}
{-# NOINLINE containsAny #-}
containsAny :: Searcher () -> Text -> Bool
containsAny !searcher !text =
let
f _acc _match = Aho.Done True
in case caseSensitivity searcher of
CaseSensitive -> Aho.runText False f (automaton searcher) text
IgnoreCase -> Aho.runLower False f (automaton searcher) text