Safe Haskell | None |
---|---|
Language | Haskell2010 |
Synopsis
- data BamIndex a = BamIndex {
- minshift :: !Int
- depth :: !Int
- unaln_off :: !Int64
- extensions :: a
- refseq_bins :: !(Vector Bins)
- refseq_ckpoints :: !(Vector Ckpoints)
- withIndexedBam :: (MonadIO m, MonadLog m, MonadMask m) => FilePath -> (BamMeta -> BamIndex () -> Handle -> m r) -> m r
- readBamIndex :: FilePath -> IO (BamIndex ())
- readBaiIndex :: MonadIO m => ByteStream m r -> m (BamIndex ())
- readTabix :: MonadIO m => ByteStream m r -> m TabIndex
- data IndexFormatError = IndexFormatError Bytes
- data Region = Region {}
- newtype Subsequence = Subsequence (IntMap Int)
- streamBamRefseq :: (MonadIO m, MonadLog m) => BamIndex b -> Handle -> Refseq -> Stream (Of BamRaw) m ()
- streamBamRegions :: MonadIO m => BamIndex b -> Handle -> [Region] -> Stream (Of BamRaw) m ()
- streamBamSubseq :: MonadIO m => BamIndex b -> Handle -> Refseq -> Subsequence -> Stream (Of BamRaw) m () -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ())
- streamBamUnaligned :: MonadIO m => BamIndex b -> Handle -> Stream (Of BamRaw) m ()
Documentation
Full index, unifying BAI and CSI style. In both cases, we have the
binning scheme, parameters are fixed in BAI, but variable in CSI.
Checkpoints are created from the linear index in BAI or from the
loffset
field in CSI.
BamIndex | |
|
withIndexedBam :: (MonadIO m, MonadLog m, MonadMask m) => FilePath -> (BamMeta -> BamIndex () -> Handle -> m r) -> m r Source #
readBamIndex :: FilePath -> IO (BamIndex ()) Source #
Reads any index we can find for a file.
If the file name has a .bai or .csi extension, optionally followed by .gz, we read it. Else we look for the index by adding such an extension and by replacing the extension with these two, and finally try the file itself. The first file that exists is used.
readBaiIndex :: MonadIO m => ByteStream m r -> m (BamIndex ()) Source #
Reads an index in BAI or CSI format, recognized automatically. The index can be compressed, even though this isn't standard.
readTabix :: MonadIO m => ByteStream m r -> m TabIndex Source #
Reads a Tabix index. Note that tabix indices are compressed, this is taken care of automatically.
data IndexFormatError Source #
Instances
Show IndexFormatError Source # | |
Defined in Bio.Bam.Index showsPrec :: Int -> IndexFormatError -> ShowS # show :: IndexFormatError -> String # showList :: [IndexFormatError] -> ShowS # | |
Exception IndexFormatError Source # | |
Defined in Bio.Bam.Index |
newtype Subsequence Source #
A mostly contiguous subset of a sequence, stored as a set of
non-overlapping intervals in an IntMap
from start position to end
position (half-open intervals, naturally).
Instances
Show Subsequence Source # | |
Defined in Bio.Bam.Regions showsPrec :: Int -> Subsequence -> ShowS # show :: Subsequence -> String # showList :: [Subsequence] -> ShowS # |
streamBamRefseq :: (MonadIO m, MonadLog m) => BamIndex b -> Handle -> Refseq -> Stream (Of BamRaw) m () Source #
Streams one reference from a bam file.
Seeks to a given sequence in a Bam file and enumerates only those
records aligning to that reference. We use the first checkpoint
available for the sequence, which an appropriate index. Streams the
BamRaw
records of the correct reference sequence only, and produces an
empty stream if the sequence isn't found.
streamBamRegions :: MonadIO m => BamIndex b -> Handle -> [Region] -> Stream (Of BamRaw) m () Source #
streamBamSubseq :: MonadIO m => BamIndex b -> Handle -> Refseq -> Subsequence -> Stream (Of BamRaw) m () -> Stream (Of BamRaw) m (Stream (Of BamRaw) m ()) Source #
streamBamUnaligned :: MonadIO m => BamIndex b -> Handle -> Stream (Of BamRaw) m () Source #
Reads from a Bam file the part with unaligned reads.
Sort of the dual to streamBamRefseq
. Since the index does not
actually point to the unaligned part at the end, we use a best guess at
where the unaligned stuff might start, then skip over any aligned
records. Our "fallback guess" is to decode from the current position;
this only works if something else already consumed the Bam header.