-- | PopKey gives you a static key-value storage structure backed by poppy indices. Construction is slow (multiple passes are made over the data to choose a good indexing structure), but querying should be fast, and space overhead should be much lower than Data.Map—on the data set I'm working with, Data.Map has 8.3x more overhead than PopKey—and the raw data transparently lives in an mmap'd region if you use @storage@, meaning the actual memory needed for usage is very low.
--
-- To construct, you will need @PopKeyEncoding@ instances. You may choose the granularity by which you encode your data types by choosing one of two auto-deriving patterns. The first, implicitly derived via GHC Generics, will use a granular encoding, indexing fields separately internally, while the second, derived via the @StoreBlob@ newtype, will encode the data as a single unit. Which is better depends on the situation, but as a general rule you should pack your constant-size structures into a single blob while letting your variable-sized fields use the granular encoding.
--
-- @
-- -- Encode @MyType@ with separate indices for the @[ String ]@ and @String@ fields.
-- data MyType = MyType [ String ] String
--   deriving (Generic,PopKeyEncoding)
-- @
-- 
-- @
-- -- Encode @Point@ as a blob, with all 3 @Int@ fields stored contiguously.
-- data Point = Point Int Int Int
--   deriving (Generic,Store) -- @Store@ here is from Data.Store
--   deriving PopKeyEncoding via StoreBlob Point
-- @
--
-- Reading from and storing to disk come pre-packaged, in such a way that loading your structure from the disk will strictly load the small index metadata while leaving the large raw data to be backed by mmap. You may use this functionality as follows:
--
-- @
-- myData :: PopKeyStore Point MyType
-- myData = storage "myindex.poppy"
-- 
-- main :: IO ()
-- main = do
--   -- your data
--   let dat :: [ (Point , MyType) ] = ...
-- 
--   -- store the indexed data to disk
--   storePopKey myData dat
-- 
--   -- load the indexed data from disk
--   pk :: PopKey Point MyType <- loadPopKey myData
-- 
--   ...
-- @
--
-- Poppy natively supports array-style indexing, so if your "key" set is simply the dense set of integers  @[ 0 .. n - 1 ]@ where @n@ is the number of items in your data set, key storage may be left implicit and elided entirely. In this API, when the distinction is necessary, working with such an implicit index is signified by a trailing ', e.g., @storage@ vs @storage'@.
--
-- Note that constant-factor space & time overhead is fairly high, so unless you have at least a couple thousand items, it is recommended to avoid PopKey. Once you have 10k+ items, the asymptotics should win out, and PopKey should perform well.

module PopKey
       ( type PopKey
       , (!)
       , PopKey.lookup
       , makePopKey
       , makePopKey'
       , foldrWithKey
       , foldlWithKey'
       , storage
       , storage'
       , StoreBlob(..)
       , PopKeyEncoding
       , PopKeyStore
       , PopKeyStore'
       , StorePopKey(..)
       ) where

import qualified Data.ByteString as BS
import Data.Store (encode , decodeEx)
import GHC.Word
import HaskellWorks.Data.FromForeignRegion
import System.IO

import PopKey.Internal2
import PopKey.Internal3
import PopKey.Encoding


{-# INLINE (!) #-}
-- | Lookup by a key known to be in the structure.
(!) :: PopKeyEncoding k => PopKey k v -> k -> v
! :: forall k v. PopKeyEncoding k => PopKey k v -> k -> v
(!) (PopKeyInt Bool
_ F s PKPrim
p F' s ByteString -> v
vd) k
k = F' s ByteString -> v
vd do forall s. Int -> F s PKPrim -> F' s ByteString
rawq k
k F s PKPrim
p
(!) (PopKeyAny Bool
_ F s PKPrim
pv F' s ByteString -> v
vd F (Shape k) PKPrim
pk) k
k =
  F' s ByteString -> v
vd do forall s. Int -> F s PKPrim -> F' s ByteString
rawq (forall s. F s PKPrim -> F' s ByteString -> Int -> Int -> Int
bin_search2 F (Shape k) PKPrim
pk (forall a. PopKeyEncoding a => a -> F' (Shape a) ByteString
pkEncode k
k) Int
0 (forall s. F s PKPrim -> Int
flength F (Shape k) PKPrim
pk forall a. Num a => a -> a -> a
- Int
1)) F s PKPrim
pv

{-# INLINE lookup #-}
-- | Lookup by a key which may or may not be in the structure.
lookup :: PopKeyEncoding k => PopKey k v -> k -> Maybe v
lookup :: forall k v. PopKeyEncoding k => PopKey k v -> k -> Maybe v
lookup s :: PopKey k v
s@(PopKeyInt Bool
_ F s PKPrim
p F' s ByteString -> v
vd) k
i = if k
i forall a. Ord a => a -> a -> Bool
>= k
0 Bool -> Bool -> Bool
&& k
i forall a. Ord a => a -> a -> Bool
< forall (t :: * -> *) a. Foldable t => t a -> Int
length PopKey k v
s
  then forall a. a -> Maybe a
Just (F' s ByteString -> v
vd do forall s. Int -> F s PKPrim -> F' s ByteString
rawq k
i F s PKPrim
p)
  else forall a. Maybe a
Nothing
lookup (PopKeyAny Bool
_ F s PKPrim
pv F' s ByteString -> v
vd F (Shape k) PKPrim
pk) k
k = do
  let i :: Int
i = forall s. F s PKPrim -> F' s ByteString -> Int -> Int -> Int
bin_search2 F (Shape k) PKPrim
pk (forall a. PopKeyEncoding a => a -> F' (Shape a) ByteString
pkEncode k
k) Int
0 (forall s. F s PKPrim -> Int
flength F (Shape k) PKPrim
pk forall a. Num a => a -> a -> a
- Int
1)
  if Int
i forall a. Eq a => a -> a -> Bool
== -Int
1
     then forall a. Maybe a
Nothing
     else forall a. a -> Maybe a
Just (F' s ByteString -> v
vd do forall s. Int -> F s PKPrim -> F' s ByteString
rawq Int
i F s PKPrim
pv)


-- | You may use @storage@ to gain a pair of operations to serialize and read your structure from disk. This will be more efficient than if you naively serialize and store the data, as it strictly reads index metadata into memory while leaving the larger raw chunks to be backed by mmap.
storage :: (PopKeyEncoding k , PopKeyEncoding v)
        => FilePath -> PopKeyStore k v
storage :: forall k v.
(PopKeyEncoding k, PopKeyEncoding v) =>
FilePath -> PopKeyStore k v
storage FilePath
p =
  forall k v.
(forall (f :: * -> *). Foldable f => f (k, v) -> IO ())
-> IO (PopKey k v) -> PopKeyStore k v
PopKeyStore
    do \f (k, v)
d -> do
         let (ByteString
b1,ByteString
b2) = forall a. BiSerialize a => a -> (ByteString, ByteString)
bencode (forall k v. PopKey k v -> SPopKey k v
toSPopKey (forall (f :: * -> *) k v.
(Foldable f, PopKeyEncoding k, PopKeyEncoding v) =>
f (k, v) -> PopKey k v
makePopKey f (k, v)
d))
         forall r. FilePath -> IOMode -> (Handle -> IO r) -> IO r
withBinaryFile FilePath
p IOMode
WriteMode \Handle
fh -> do
           Handle -> ByteString -> IO ()
BS.hPut Handle
fh (forall a. Store a => a -> ByteString
encode (forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
BS.length ByteString
b1) :: Word64))
           Handle -> ByteString -> IO ()
BS.hPut Handle
fh ByteString
b1
           Handle -> ByteString -> IO ()
BS.hPut Handle
fh ByteString
b2
    do Handle
fh <- FilePath -> IOMode -> IO Handle
openBinaryFile FilePath
p IOMode
ReadMode
       Word64
w64 :: Word64 <- forall a. Store a => ByteString -> a
decodeEx forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Handle -> Int -> IO ByteString
BS.hGet Handle
fh Int
8
       let s :: Int
s = forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
w64
       ByteString
b1 <- Handle -> Int -> IO ByteString
BS.hGet Handle
fh Int
s
       Handle -> IO ()
hClose Handle
fh
       ByteString
b2 <- Int -> ByteString -> ByteString
BS.drop (Int
8 forall a. Num a => a -> a -> a
+ Int
s) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. FromForeignRegion a => FilePath -> IO a
mmapFromForeignRegion FilePath
p
       forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall k v.
(PopKeyEncoding k, PopKeyEncoding v) =>
SPopKey k v -> PopKey k v
fromSPopKey (forall a. BiSerialize a => (ByteString, ByteString) -> a
bdecode (ByteString
b1,ByteString
b2)))

-- | Like @storage@, but for canonical integer keys.
storage' :: PopKeyEncoding v
         => FilePath -> PopKeyStore' v
storage' :: forall v. PopKeyEncoding v => FilePath -> PopKeyStore' v
storage' FilePath
p = forall v.
(forall (f :: * -> *). Foldable f => f v -> IO ())
-> IO (PopKey Int v) -> PopKeyStore' v
PopKeyStore'
  do \f v
d -> do
       let (ByteString
b1,ByteString
b2) = forall a. BiSerialize a => a -> (ByteString, ByteString)
bencode (forall k v. PopKey k v -> SPopKey k v
toSPopKey (forall (f :: * -> *) v.
(Foldable f, PopKeyEncoding v) =>
f v -> PopKey Int v
makePopKey' f v
d))
       forall r. FilePath -> IOMode -> (Handle -> IO r) -> IO r
withBinaryFile FilePath
p IOMode
WriteMode \Handle
fh -> do
         Handle -> ByteString -> IO ()
BS.hPut Handle
fh (forall a. Store a => a -> ByteString
encode (forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
BS.length ByteString
b1) :: Word64))
         Handle -> ByteString -> IO ()
BS.hPut Handle
fh ByteString
b1
         Handle -> ByteString -> IO ()
BS.hPut Handle
fh ByteString
b2
  do Handle
fh <- FilePath -> IOMode -> IO Handle
openBinaryFile FilePath
p IOMode
ReadMode
     Word64
w64 :: Word64 <- forall a. Store a => ByteString -> a
decodeEx forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Handle -> Int -> IO ByteString
BS.hGet Handle
fh Int
8
     let s :: Int
s = forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
w64
     ByteString
b1 <- Handle -> Int -> IO ByteString
BS.hGet Handle
fh Int
s
     Handle -> IO ()
hClose Handle
fh
     ByteString
b2 <- Int -> ByteString -> ByteString
BS.drop (Int
8 forall a. Num a => a -> a -> a
+ Int
s) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. FromForeignRegion a => FilePath -> IO a
mmapFromForeignRegion FilePath
p
     forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall v. PopKeyEncoding v => SPopKey Int v -> PopKey Int v
fromSPopKey' (forall a. BiSerialize a => (ByteString, ByteString) -> a
bdecode (ByteString
b1,ByteString
b2)))