{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
module Data.Text.AhoCorasick.Splitter
( Splitter
, build
, automaton
, separator
, split
, splitIgnoreCase
, splitReverse
, splitReverseIgnoreCase
) where
import Control.DeepSeq (NFData (..))
import Data.Function (on)
import Data.Hashable (Hashable (..))
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.Text (Text)
#if defined(HAS_AESON)
import qualified Data.Aeson as AE
#endif
import qualified Data.List.NonEmpty as NonEmpty
import Data.Text.AhoCorasick.Automaton (AcMachine)
import qualified Data.Text.AhoCorasick.Automaton as Aho
import qualified Data.Text.Utf16 as Utf16
data Splitter =
Splitter
{ splitterAutomaton :: AcMachine ()
, splitterSeparator :: Text
}
#if defined(HAS_AESON)
instance AE.ToJSON Splitter where
toJSON = AE.toJSON . separator
instance AE.FromJSON Splitter where
parseJSON v = build <$> AE.parseJSON v
#endif
{-# INLINE build #-}
build :: Text -> Splitter
build sep =
let !auto = Aho.build [(Utf16.unpackUtf16 sep, ())] in
Splitter auto sep
{-# INLINE automaton #-}
automaton :: Splitter -> AcMachine ()
automaton = splitterAutomaton
{-# INLINE separator #-}
separator :: Splitter -> Text
separator = splitterSeparator
{-# INLINE split #-}
split :: Splitter -> Text -> NonEmpty Text
split = (NonEmpty.reverse .) . splitReverse
{-# INLINE splitIgnoreCase #-}
splitIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitIgnoreCase = (NonEmpty.reverse .) . splitReverseIgnoreCase
{-# INLINE splitReverse #-}
splitReverse :: Splitter -> Text -> NonEmpty Text
splitReverse s t =
finalizeAccum $
Aho.runText
(zeroAccum (separator s) t)
stepAccum
(automaton s)
t
{-# INLINE splitReverseIgnoreCase #-}
splitReverseIgnoreCase :: Splitter -> Text -> NonEmpty Text
splitReverseIgnoreCase s t =
finalizeAccum $
Aho.runLower
(zeroAccum (separator s) t)
stepAccum
(automaton s)
t
data Accum =
Accum
{ _accumSepLen :: !Aho.CodeUnitIndex
, _accumHaystack :: !Text
, accumResult :: ![Text]
, accumPrevEnd :: !Aho.CodeUnitIndex
}
{-# INLINE finalizeAccum #-}
finalizeAccum :: Accum -> NonEmpty Text
finalizeAccum (Accum _ hay res prevEnd) =
let !str = Utf16.unsafeSliceUtf16 prevEnd (Utf16.lengthUtf16 hay - prevEnd) hay in
str :| res
{-# INLINE zeroAccum #-}
zeroAccum :: Text -> Text -> Accum
zeroAccum sep hay = Accum (Utf16.lengthUtf16 sep) hay [] 0
{-# INLINE stepAccum #-}
stepAccum :: Accum -> Aho.Match v -> Aho.Next Accum
stepAccum acc@(Accum sepLen hay res prevEnd) (Aho.Match sepEnd _)
| sepEnd - sepLen < prevEnd =
Aho.Step acc
| otherwise =
let !str = Utf16.unsafeSliceUtf16 prevEnd (sepEnd - sepLen - prevEnd) hay in
Aho.Step acc { accumResult = str : res, accumPrevEnd = sepEnd }
instance Eq Splitter where
{-# INLINE (==) #-}
(==) = (==) `on` separator
instance Ord Splitter where
{-# INLINE compare #-}
compare = compare `on` separator
instance Hashable Splitter where
{-# INLINE hashWithSalt #-}
hashWithSalt salt searcher =
salt `hashWithSalt` separator searcher
instance NFData Splitter where
{-# INLINE rnf #-}
rnf (Splitter searcher sepLength) =
rnf searcher `seq`
rnf sepLength
instance Show Splitter where
showsPrec p splitter =
showParen (p > 10) $
showString "build " .
showsPrec 11 (separator splitter)