{-# OPTIONS_GHC-funbox-strict-fields #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ScopedTypeVariables #-}
module HaskellWorks.Data.RankSelect.CsPoppy.Internal
( CsInterleaved(..)
, getCsiX
, getCsiA
, getCsiB
, getCsiC
, getCsiTotal
, mkCsi
, putCsiX
, putCsiA
, putCsiB
, putCsiC
, indexOrZero
, lastOrZero
, makeCsPoppyBlocks1
, makeCsPoppyBlocks2
, makeCsPoppyLayerM
, makeCsPoppyLayerM2
, genCsSamples
) where
import Data.Word
import Foreign.Ptr
import Foreign.Storable
import HaskellWorks.Data.AtIndex
import HaskellWorks.Data.Bits.BitWise
import HaskellWorks.Data.Bits.PopCount.PopCount1
import HaskellWorks.Data.Positioning
import HaskellWorks.Data.RankSelect.Base.Select1
import qualified Control.Monad.ST as ST
import qualified Data.Vector.Storable as DVS
import qualified Data.Vector.Storable.Mutable as DVSM
newtype CsInterleaved = CsInterleaved { unCsInterleaved :: Word64 } deriving Eq
instance Storable CsInterleaved where
sizeOf = sizeOf . unCsInterleaved
{-# INLINE sizeOf #-}
alignment = alignment . unCsInterleaved
{-# INLINE alignment #-}
peekElemOff ptr i = CsInterleaved <$> peekElemOff (castPtr ptr) i
{-# INLINE peekElemOff #-}
pokeElemOff ptr i = pokeElemOff (castPtr ptr) i . unCsInterleaved
{-# INLINE pokeElemOff #-}
mkCsi :: Word64 -> Word64 -> Word64 -> Word64 -> CsInterleaved
mkCsi x a b c = CsInterleaved
$ ((x .&. 0xffffffff) .<. 0)
.|. ((a .&. 0x3ff) .<. 32)
.|. ((b .&. 0x3ff) .<. 42)
.|. ((c .&. 0x3ff) .<. 52)
getCsiX :: CsInterleaved -> Word64
getCsiX (CsInterleaved i) = i .&. 0xffffffff
getCsiA :: CsInterleaved -> Word64
getCsiA (CsInterleaved i) = (i .>. 32) .&. 0x3ff
getCsiB :: CsInterleaved -> Word64
getCsiB (CsInterleaved i) = (i .>. 42) .&. 0x3ff
getCsiC :: CsInterleaved -> Word64
getCsiC (CsInterleaved i) = (i .>. 52) .&. 0x3ff
getCsiTotal :: CsInterleaved -> Word64
getCsiTotal csi = getCsiX csi + getCsiA csi + getCsiB csi + getCsiC csi
putCsiX :: Word64 -> CsInterleaved -> CsInterleaved
putCsiX v (CsInterleaved i) = CsInterleaved (((v .&. 0xffffffff) .<. 0) .|. (i .&. 0xffffffff00000000))
putCsiA :: Word64 -> CsInterleaved -> CsInterleaved
putCsiA v (CsInterleaved i) = CsInterleaved (((v .&. 0x3ff) .<. 32) .|. (i .&. 0xfffffc00ffffffff))
putCsiB :: Word64 -> CsInterleaved -> CsInterleaved
putCsiB v (CsInterleaved i) = CsInterleaved (((v .&. 0x3ff) .<. 42) .|. (i .&. 0xfff003ffffffffff))
putCsiC :: Word64 -> CsInterleaved -> CsInterleaved
putCsiC v (CsInterleaved i) = CsInterleaved (((v .&. 0x3ff) .<. 52) .|. (i .&. 0xc00fffffffffffff))
instance Show CsInterleaved where
showsPrec _ i = shows (getCsiX i, getCsiA i, getCsiB i, getCsiC i)
makeCsPoppyBlocks1 :: DVS.Vector Word64 -> DVS.Vector Word64
makeCsPoppyBlocks1 v = DVS.constructN (((DVS.length v + 8 - 1) `div` 8) + 1) genBlocks
where genBlocks :: DVS.Vector Word64 -> Word64
genBlocks u = let i = DVS.length u in popCount1 (DVS.take 8 (DVS.drop (i * 8) v))
makeCsPoppyBlocks2 :: DVS.Vector Word64 -> DVS.Vector Word64
makeCsPoppyBlocks2 v = DVS.constructN (((DVS.length v + 8 - 1) `div` 8) + 1) genBlocks
where e = DVS.length v
genBlocks :: DVS.Vector Word64 -> Word64
genBlocks u = let i = DVS.length u in if
| (i + 1) * 8 <= e -> let j = i * 8 in
popCount1 (DVS.unsafeIndex v (j + 0)) +
popCount1 (DVS.unsafeIndex v (j + 1)) +
popCount1 (DVS.unsafeIndex v (j + 2)) +
popCount1 (DVS.unsafeIndex v (j + 3)) +
popCount1 (DVS.unsafeIndex v (j + 4)) +
popCount1 (DVS.unsafeIndex v (j + 5)) +
popCount1 (DVS.unsafeIndex v (j + 6)) +
popCount1 (DVS.unsafeIndex v (j + 7))
| otherwise -> popCount1 (DVS.take 8 (DVS.drop (i * 8) v))
makeCsPoppyLayerM :: DVS.Vector Word64 -> DVS.Vector Word64
makeCsPoppyLayerM blocks = DVS.constructN (((DVS.length blocks + 4 - 1) `div` 4) + 1) genLayer1
where genLayer1 :: DVS.Vector Word64 -> Word64
genLayer1 u =
let i = end u in
let lx = lastOrZero u in
let la = indexOrZero blocks (i * 4 - 4) in
let lb = indexOrZero blocks (i * 4 - 3) in
let lc = indexOrZero blocks (i * 4 - 2) in
let ld = indexOrZero blocks (i * 4 - 1) in
let nx = lx + (la + lb + lc + ld) in
let na = indexOrZero blocks (i * 4 + 0) in
let nb = indexOrZero blocks (i * 4 + 1) in
let nc = indexOrZero blocks (i * 4 + 2) in
( ( nx .&. 0x00000000ffffffff)
.|. ((na .<. 32) .&. 0x000003ff00000000)
.|. ((nb .<. 42) .&. 0x000ffc0000000000)
.|. ((nc .<. 52) .&. 0x3ff0000000000000))
makeCsPoppyLayerM2 :: DVS.Vector Word64 -> DVS.Vector Word64
makeCsPoppyLayerM2 blocks = DVS.constructN (((DVS.length blocks + 4 - 1) `div` 4) + 1) genLayer1
where genLayer1 :: DVS.Vector Word64 -> Word64
genLayer1 u = let ui = end u in if ui > 0 && ui * 4 + 4 < end blocks
then let lx = DVS.unsafeIndex u (fromIntegral (ui - 1)) in
let la = DVS.unsafeIndex blocks (fromIntegral (ui * 4 - 4)) in
let lb = DVS.unsafeIndex blocks (fromIntegral (ui * 4 - 3)) in
let lc = DVS.unsafeIndex blocks (fromIntegral (ui * 4 - 2)) in
let ld = DVS.unsafeIndex blocks (fromIntegral (ui * 4 - 1)) in
let nx = lx + (la + lb + lc + ld) in
let na = DVS.unsafeIndex blocks (fromIntegral (ui * 4 + 0)) in
let nb = DVS.unsafeIndex blocks (fromIntegral (ui * 4 + 1)) in
let nc = DVS.unsafeIndex blocks (fromIntegral (ui * 4 + 2)) in
( ( nx .&. 0x00000000ffffffff)
.|. ((na .<. 32) .&. 0x000003ff00000000)
.|. ((nb .<. 42) .&. 0x000ffc0000000000)
.|. ((nc .<. 52) .&. 0x3ff0000000000000))
else let lx = lastOrZero u in
let la = indexOrZero blocks (ui * 4 - 4) in
let lb = indexOrZero blocks (ui * 4 - 3) in
let lc = indexOrZero blocks (ui * 4 - 2) in
let ld = indexOrZero blocks (ui * 4 - 1) in
let nx = lx + (la + lb + lc + ld) in
let na = indexOrZero blocks (ui * 4 + 0) in
let nb = indexOrZero blocks (ui * 4 + 1) in
let nc = indexOrZero blocks (ui * 4 + 2) in
( ( nx .&. 0x00000000ffffffff)
.|. ((na .<. 32) .&. 0x000003ff00000000)
.|. ((nb .<. 42) .&. 0x000ffc0000000000)
.|. ((nc .<. 52) .&. 0x3ff0000000000000))
indexOrZero :: DVS.Vector Word64 -> Position -> Word64
indexOrZero _ i | i < 0 = 0
indexOrZero v i | i < end v = v !!! i
indexOrZero _ _ = 0
{-# INLINE indexOrZero #-}
lastOrZero :: DVS.Vector Word64 -> Word64
lastOrZero v | 0 < end v = DVS.last v
lastOrZero _ = 0
{-# INLINE lastOrZero #-}
sampleWidth :: Count
sampleWidth = 8192
{-# INLINE sampleWidth #-}
genCsSamples :: Count -> DVS.Vector Word64 -> DVS.Vector Word64
genCsSamples pc v = DVS.create (genCsSamplesST pc v)
genCsSamplesST :: Count -> DVS.Vector Word64 -> (forall s . ST.ST s (DVS.MVector s Word64))
genCsSamplesST pc v = do
u :: DVSM.MVector s Word64 <- DVSM.unsafeNew maxSize
go u 0 0 (DVS.length v) 0 1
where maxSize :: Int
maxSize = fromIntegral $ (pc `div` 8192) + 1
go :: DVS.MVector s Word64 -> Int -> Int -> Int -> Count -> Count -> ST.ST s (DVS.MVector s Word64)
go u ui vi vie lpc epc | vi < vie = do
let vw = DVS.unsafeIndex v vi
let vwpc = popCount1 vw
let npc = vwpc + lpc
if npc >= epc
then do
let dpc = epc - lpc
let ewp = select1 vw dpc + fromIntegral vi * 64
DVSM.unsafeWrite u ui ewp
go u (ui + 1) (vi + 1) vie npc (epc + sampleWidth)
else do
go u ui (vi + 1) vie npc epc
go u ui _ _ _ _ = return (DVSM.take ui u)