Safe Haskell	None
Language	Haskell2010

Bio.Streaming.Bgzf

Description

Buffer builder to assemble Bgzf blocks. The idea is to serialize stuff (BAM and BCF) into a buffer, then bgzf chunks from the buffer. We use a large buffer, and we always make sure there is plenty of space in it (to avoid redundant checks).

Synopsis

bgunzip :: MonadIO m => ByteStream m r -> ByteStream m r
getBgzfHdr :: Monad m => ByteStream m r -> m (Maybe Int, ByteString, ByteStream m r)
data BB = BB {
- buffer :: !(ForeignPtr Word8)
- size :: !Int
- off :: !Int
- used :: !Int
- mark :: !Int
- mark2 :: !Int
}
newBuffer :: Int -> IO BB
fillBuffer :: BB -> BgzfTokens -> IO (BB, BgzfTokens)
expandBuffer :: Int -> BB -> IO BB
encodeBgzf :: MonadIO m => Int -> Stream (Of (Endo BgzfTokens)) m b -> ByteStream m b
data BgzfTokens
- = TkWord32 !Word32 BgzfTokens
- | TkWord16 !Word16 BgzfTokens
- | TkWord8 !Word8 BgzfTokens
- | TkFloat !Float BgzfTokens
- | TkDouble !Double BgzfTokens
- | TkString !ByteString BgzfTokens
- | TkDecimal !Int BgzfTokens
- | TkMemFill !Int !Word8 BgzfTokens
- | TkMemCopy !(Vector Word8) BgzfTokens
- | TkSetMark BgzfTokens
- | TkEndRecord BgzfTokens
- | TkEndRecordPart1 BgzfTokens
- | TkEndRecordPart2 BgzfTokens
- | TkEnd
- | TkBclSpecial !BclArgs BgzfTokens
- | TkLowLevel !Int (BB -> IO BB) BgzfTokens
data BclArgs = BclArgs BclSpecialType !(Vector Word8) !Int !Int !Int !Int
data BclSpecialType
- = BclNucsBin
- | BclNucsAsc
- | BclNucsAscRev
- | BclNucsWide
- | BclQualsBin
- | BclQualsAsc
- | BclQualsAscRev
loop_dec_int :: Ptr Word8 -> Int -> IO Int
loop_bcl_special :: Ptr Word8 -> BclArgs -> IO Int
data CompressionError = CompressionError !CInt
data DecompressionError = DecompressionError !CInt

Documentation

bgunzip :: MonadIO m => ByteStream m r -> ByteStream m r Source #

Decompresses a bgzip stream. Individual chunks are decompressed in parallel. Leftovers are discarded (some compressed HETFA files appear to have junk at the end).

getBgzfHdr :: Monad m => ByteStream m r -> m (Maybe Int, ByteString, ByteStream m r) Source #

data BB Source #

We manage a large buffer (multiple megabytes), of which we fill an initial portion. We remember the size, the used part, and two marks where we later fill in sizes for the length prefixed BAM or BCF records. We move the buffer down when we yield a piece downstream, and when we run out of space, we simply move to a new buffer. Garbage collection should take care of the rest. Unused mark must be set to (maxBound::Int) so it doesn't interfere with flushing.

Constructors

BB
Fields buffer :: !(ForeignPtr Word8) size :: !Int off :: !Int used :: !Int mark :: !Int mark2 :: !Int

newBuffer :: Int -> IO BB Source #

Creates a buffer.

fillBuffer :: BB -> BgzfTokens -> IO (BB, BgzfTokens) Source #

expandBuffer :: Int -> BB -> IO BB Source #

Creates a new buffer, copying the active content from an old one, with higher capacity. The size of the new buffer is twice the free space in the old buffer, but at least minsz.

encodeBgzf :: MonadIO m => Int -> Stream (Of (Endo BgzfTokens)) m b -> ByteStream m b Source #

Expand a chain of tokens into a buffer, sending finished pieces downstream as soon as possible.

data BgzfTokens Source #

Things we are able to encode. Taking inspiration from binary-serialise-cbor, we define these as a lazy list-like thing and consume it in a interpreter.

data BclArgs Source #

Constructors

BclArgs BclSpecialType !(Vector Word8) !Int !Int !Int !Int

data BclSpecialType Source #

Constructors

BclNucsBin
BclNucsAsc
BclNucsAscRev
BclNucsWide
BclQualsBin
BclQualsAsc
BclQualsAscRev

loop_dec_int :: Ptr Word8 -> Int -> IO Int Source #

loop_bcl_special :: Ptr Word8 -> BclArgs -> IO Int Source #

data CompressionError Source #

Constructors

CompressionError !CInt

Instances

Show CompressionError Source #
Instance details Defined in Bio.Streaming.Bgzf Methods showsPrec :: Int -> CompressionError -> ShowS # show :: CompressionError -> String # showList :: [CompressionError] -> ShowS #
Exception CompressionError Source #
Instance details Defined in Bio.Streaming.Bgzf Methods toException :: CompressionError -> SomeException # fromException :: SomeException -> Maybe CompressionError # displayException :: CompressionError -> String #

data DecompressionError Source #

Constructors

DecompressionError !CInt

Instances

Show DecompressionError Source #
Instance details Defined in Bio.Streaming.Bgzf Methods showsPrec :: Int -> DecompressionError -> ShowS # show :: DecompressionError -> String # showList :: [DecompressionError] -> ShowS #
Exception DecompressionError Source #
Instance details Defined in Bio.Streaming.Bgzf Methods toException :: DecompressionError -> SomeException # fromException :: SomeException -> Maybe DecompressionError # displayException :: DecompressionError -> String #

TkWord32 !Word32 BgzfTokens
TkWord16 !Word16 BgzfTokens
TkWord8 !Word8 BgzfTokens
TkFloat !Float BgzfTokens
TkDouble !Double BgzfTokens
TkString !ByteString BgzfTokens
TkDecimal !Int BgzfTokens
TkMemFill !Int !Word8 BgzfTokens
TkMemCopy !(Vector Word8) BgzfTokens
TkSetMark BgzfTokens
TkEndRecord BgzfTokens
TkEndRecordPart1 BgzfTokens
TkEndRecordPart2 BgzfTokens
TkEnd
TkBclSpecial !BclArgs BgzfTokens
TkLowLevel !Int (BB -> IO BB) BgzfTokens