module Data.BBI.BigBed
( BBedFile
, openBBedFile
, closeBBedFile
, query
) where
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.Map as M
import Conduit
import Data.BBI
import Data.BBI.Utils
newtype BBedFile = BBedFile BbiFile
openBBedFile :: FilePath -> IO BBedFile
openBBedFile fl = do
bbi <- openBbiFile fl
case (_filetype . _header) bbi of
BigBed -> return $ BBedFile bbi
_ -> error "not a bigbed file"
closeBBedFile :: BBedFile -> IO ()
closeBBedFile (BBedFile fl) = closeBbiFile fl
query :: (B.ByteString, Int, Int)
-> BBedFile
-> ConduitT () (B.ByteString, Int, Int, B.ByteString) IO ()
query (chr, s, e) (BBedFile fl) = case getChromId fl chr of
Just i -> do
blks <- lift $ overlappingBlocks fl (i, s, e)
readBlocks fl blks .| concatMapC (map f . decodeBlock endi) .| filterC
(\(_,s',e',_) -> isOverlapped (s',e') (s,e))
Nothing -> return ()
where
endi = _endian $ _header fl
id2Chr = M.fromList $ map (\(a,(b,_)) -> (b,a) ) $ M.toList $ _chromTree fl
f (a,b,c,d) = (M.findWithDefault undefined a id2Chr, b, c, d)
isOverlapped :: (Int, Int) -> (Int, Int) -> Bool
isOverlapped (lo1, hi1) (lo2, hi2) = not (lo2 > hi1 || lo1 > hi2)
{-# INLINE isOverlapped #-}
decodeBlock :: Endianness
-> B.ByteString
-> [(Int, Int, Int, B.ByteString)]
decodeBlock e blk
| B.null blk = []
| otherwise = (chr, start, end, rest) : decodeBlock e (B.tail remain)
where
chr = readInt32 e $ B.take 4 blk
start = readInt32 e $ B.take 4 $ B.drop 4 blk
end = readInt32 e $ B.take 4 $ B.drop 8 blk
(rest, remain) = BC.break (=='\0') $ B.drop 12 blk
{-# INLINE decodeBlock #-}