{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE Unsafe #-}
module Data.TextArray.Unboxed.Internal where
import Control.DeepSeq
import Control.Monad
import Control.Monad.ST
import qualified Data.ByteString.Short as SBS
import Data.Hashable (Hashable (..))
import qualified Data.List as List
import Data.Semigroup
import Data.Text.Short (ShortText)
import qualified Data.Text.Short as TS
import qualified GHC.Exts as GHC
import Prelude hiding (elem, length, null)
import Internal
newtype TextArray = TA# BA
ta2ba :: TextArray -> BA
ta2ba (TA# x) = x
instance Eq TextArray where
(TA# x) == (TA# y) = ba2sbs x == ba2sbs y
instance Ord TextArray where
compare x y = compare (toList x) (toList y)
instance Show TextArray where
showsPrec p ta = showsPrec p (toList ta)
show ta = show (toList ta)
instance Read TextArray where
readsPrec p = map (\(x,s) -> (fromList x,s)) . readsPrec p
instance GHC.IsList TextArray where
type Item TextArray = ShortText
fromList = fromList
toList = toList
instance NFData TextArray where
rnf (TA# (BA# !_)) = ()
instance Semigroup TextArray where
(<>) = append
instance Monoid TextArray where
mempty = empty
mappend = (<>)
instance Hashable TextArray where
hashWithSalt salt (TA# ba) = hashWithSalt salt ba
empty :: TextArray
empty = TA# $ createBA wordSize $ \mba -> writeIntArray mba 0 0
singleton :: ShortText -> TextArray
singleton t = TA# $ createBA totalSize $ \mba -> do
writeIntArray mba 0 1
writeIntArray mba 1 dataOfs
copyByteArray (st2ba t) 0 mba dataOfs dataSize
where
dataOfs = wordSize * 2
dataSize = sizeOfByteArray (st2ba t)
totalSize = dataOfs + dataSize
null :: TextArray -> Bool
null = (==0) . length
length :: TextArray -> Int
length (TA# ba) = indexIntArray ba 0
(!?) :: TextArray -> Int -> Maybe ShortText
ta@(TA# ba) !? i = case indexOfsLen ta i of
Just (IdxOfsLen _ ofs n) -> Just $! ba2st (sliceBA ba ofs n)
Nothing -> Nothing
fromList :: [ShortText] -> TextArray
fromList [] = empty
fromList xs = TA# $ createBA totalSize $ \mba -> do
writeIntArray mba 0 n
writeOfsTab mba 1 dataOfs xs
pure ()
where
writeOfsTab :: MBA s -> Int -> Int -> [ShortText] -> ST s ()
writeOfsTab _ !_ !_ [] = pure ()
writeOfsTab mba !j !ofs (t:ts) = do
let y = SBS.length (TS.toShortByteString t)
writeIntArray mba j ofs
copyByteArray (st2ba t) 0 mba ofs (sizeOfByteArray (st2ba t))
writeOfsTab mba (j+1) (ofs+y) ts
(dataSize,n) = sumLen $ map (SBS.length . TS.toShortByteString) xs
dataOfs = wordSize * (1+n)
totalSize = dataOfs + dataSize
append :: TextArray -> TextArray -> TextArray
append a b
| null a = b
| null b = a
append ta1@(TA# ba1) ta2@(TA# ba2) = TA# $ createBA totalSize $ \mba -> do
writeIntArray mba 0 n
forM_ [1..n1] $ \j -> writeIntArray mba j (ofs1fixup + indexIntArray ba1 j)
forM_ [1..n2] $ \j -> writeIntArray mba (n1+j) (ofs2fixup + indexIntArray ba2 j)
copyByteArray ba1 dataOfs1 mba dataOfs1' dataSz1
copyByteArray ba2 dataOfs2 mba dataOfs2' dataSz2
pure ()
where
n = n1+n2
n1 = length ta1
n2 = length ta2
ofs1fixup = n2 * wordSize
ofs2fixup = sz1 - wordSize
sz1 = sizeOfByteArray ba1
sz2 = sizeOfByteArray ba2
dataOfs1 = (1+n1) * wordSize
dataOfs2 = (1+n2) * wordSize
dataSz1 = sz1 - dataOfs1
dataSz2 = sz2 - dataOfs2
dataOfs1' = (1+n) * wordSize
dataOfs2' = dataOfs1'+dataSz1
totalSize = sz1+sz2-wordSize
toList :: TextArray -> [ShortText]
toList ta = map (\(IdxOfsLen _ ofs n) -> ba2st (sliceBA (ta2ba ta) ofs n)) (listOfsLen ta)
elem :: ShortText -> TextArray -> Bool
elem needle ta = not (List.null (elemIndices needle ta))
elemIndices :: ShortText -> TextArray -> [Int]
elemIndices needle ta = [ i | IdxOfsLen i ofs n <- listOfsLen ta, cmp ofs n ]
where
!ba2 = st2ba needle
!ba2sz = sizeOfByteArray ba2
cmp !ofs1 !n = equalByteArray ba2 0 ba2sz (ta2ba ta) ofs1 n
findAllOrd :: (Ordering -> Bool) -> ShortText -> TextArray -> [ShortText]
findAllOrd ordFilter needle ta = [ mkst ofs n | IdxOfsLen _ ofs n <- listOfsLen ta, ordFilter (cmp ofs n) ]
where
!ba2 = st2ba needle
!ba2sz = sizeOfByteArray ba2
mkst !ofs1 !n = ba2st (sliceBA (ta2ba ta) ofs1 n)
cmp !ofs1 !n = compareByteArray ba2 0 ba2sz (ta2ba ta) ofs1 n
findIndicesOrd :: (Ordering -> Bool) -> ShortText -> TextArray -> [Int]
findIndicesOrd ordFilter needle ta = [ i | IdxOfsLen i ofs n <- listOfsLen ta, ordFilter (cmp ofs n) ]
where
!ba2 = st2ba needle
!ba2sz = sizeOfByteArray ba2
cmp !ofs1 !n = compareByteArray ba2 0 ba2sz (ta2ba ta) ofs1 n
indexOfsLen :: TextArray -> Int -> Maybe IdxOfsLen
indexOfsLen ta@(TA# ba) i
| i >= sz = Nothing
| i < 0 = Nothing
| otherwise
= let i' = i+1
ofs = indexIntArray ba i'
ofs' | i' == sz = sizeOfByteArray ba
| otherwise = indexIntArray ba (i'+1)
!e = IdxOfsLen i ofs (ofs'-ofs)
in Just e
where
sz = length ta
indexOfsLen' :: TextArray -> Int -> IdxOfsLen
indexOfsLen' ta@(TA# ba) i
| i >= sz = IdxOfsLen 0 0 0
| i < 0 = IdxOfsLen 0 0 0
| otherwise
= let i' = i+1
ofs = indexIntArray ba i'
ofs' | i' == sz = sizeOfByteArray ba
| otherwise = indexIntArray ba (i'+1)
in IdxOfsLen i ofs (ofs'-ofs)
where
sz = length ta
listOfsLen :: TextArray -> [IdxOfsLen]
listOfsLen ta@(TA# ba)
| sz == 0 = []
| otherwise = go 1 (indexIntArray ba 1)
where
sz = length ta
go !i !ofs | i > sz = []
| otherwise = e : go (i+1) ofs'
where
!e = IdxOfsLen (i-1) ofs n
n = ofs'-ofs
ofs' | i == sz = sizeOfByteArray ba
| otherwise = indexIntArray ba (i+1)
sumLen :: [Int] -> (Int,Int)
sumLen = go 0 0
where
go !s !l [] = (s,l)
go !s !l (x:xs) = go (s+x) (l+1) xs