{-# LANGUAGE BangPatterns #-}
module Data.Vector.Bloom.Util
  ( rehash
  , optimalHashes
  , optimalWidth
  ) where

import Data.Bits
import Data.Hashable

pepper :: Int
pepper = 0x53dffa872f4d7341

-- | Compute several hashes using a variant of
-- <http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf Kirsch and Mitzenmacher>'s
-- double hashing.
rehash :: Hashable a => Int -> a -> [Int]
rehash k a = go k where
  go 0 = []
  go i = h : go (i - 1) where !h = h1 + shiftR h2 i
  !h1 = hash a
  !h2 = hashWithSalt pepper a

-- * Utility Functions

-- |
-- @optimalHashes n m@ calculates the optimal number of hash functions for a given number of entries @n@ in a
-- Bloom filter that is @m@ bits wide.
--
-- @k = m/n log 2@
optimalHashes :: Int -> Int -> Int
optimalHashes n m = ceiling (fromIntegral m / fromIntegral n * log 2 :: Double)

-- |
-- @optimalWidth n p@ calculate the optimal width @m@ of a bloom filter given the expected number of entries @n@
-- and target failure rate @p@ at capacity.
--
-- @m = -n log p / (log 2)^2@
optimalWidth :: Int -> Double -> Int
optimalWidth n p = ceiling (-1 * fromIntegral n * log p / log 2 / log 2 :: Double)