{-# LANGUAGE DeriveDataTypeable    #-}
{-# LANGUAGE DeriveGeneric         #-}
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedLists       #-}
{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE MultiWayIf            #-}
{-# LANGUAGE PackageImports        #-}
{-# LANGUAGE RecordWildCards       #-}
{-# LANGUAGE TemplateHaskell       #-}
{-# LANGUAGE TypeFamilies          #-}
{-# Language QuasiQuotes           #-}

-- |
-- Module      :  Data.SAM.Version1_6.Read.Base
-- Copyright   :  (c) Matthew Mosior 2023
-- License     :  BSD-style
-- Maintainer  :  mattm.github@gmail.com
-- Portability :  portable
--
-- = WARNING
--
-- This module is considered __internal__.
--
-- The Package Versioning Policy __does not apply__.
--
-- The contents of this module may change __in any way whatsoever__
-- and __without any warning__ between minor versions of this package.
--
-- Authors importing this library are expected to track development
-- closely.
--
-- All credit goes to the author(s)/maintainer(s) of the
-- [containers](https://hackage.haskell.org/package/containers) library
-- for the above warning text.
--
-- = Description
--
-- This library enables the decoding/encoding of SAM, BAM and CRAM file formats.

module Data.SAM.Version1_6.Read.Base ( -- * Reading
                                       readSAM_V1_6
                                     ) where

import Data.SAM.Version1_6.Base
import Data.SAM.Version1_6.Read.Parser.Header.HD.Base
import Data.SAM.Version1_6.Read.Parser.Header.SQ.Base
import Data.SAM.Version1_6.Read.Parser.Header.RG.Base
import Data.SAM.Version1_6.Read.Parser.Header.PG.Base
import Data.SAM.Version1_6.Read.Parser.Header.CO.Base
import Data.SAM.Version1_6.Read.Parser.Alignment.Base

import Data.Attoparsec.ByteString.Char8  as DABC8
import Data.Attoparsec.ByteString.Lazy   as DABL
import Data.ByteString.Lazy              as DBL
import Data.Sequence                     as DSeq
import qualified Streamly.Data.Stream    as S
import Streamly.External.ByteString.Lazy as StreamlyLByteString (fromChunksIO)
import Streamly.Internal.FileSystem.File as StreamlyInternalFile (chunkReader)

-- | Make a parser optional, return Nothing if there is no match.
maybeOption :: Parser a
            -> Parser (Maybe a)
maybeOption :: forall a. Parser a -> Parser (Maybe a)
maybeOption Parser a
p = Maybe a
-> Parser ByteString (Maybe a) -> Parser ByteString (Maybe a)
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Maybe a
forall a. Maybe a
Nothing (a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> Parser a -> Parser ByteString (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p)

-- | Define the @"SAM_V1_6"@ parser.
parse_SAM_V1_6 :: Parser SAM_V1_6
parse_SAM_V1_6 :: Parser SAM_V1_6
parse_SAM_V1_6 = do
  Maybe SAM_V1_6_File_Level_Metadata
filelevelmetadata           <- Parser SAM_V1_6_File_Level_Metadata
-> Parser (Maybe SAM_V1_6_File_Level_Metadata)
forall a. Parser a -> Parser (Maybe a)
maybeOption (Parser SAM_V1_6_File_Level_Metadata
 -> Parser (Maybe SAM_V1_6_File_Level_Metadata))
-> Parser SAM_V1_6_File_Level_Metadata
-> Parser (Maybe SAM_V1_6_File_Level_Metadata)
forall a b. (a -> b) -> a -> b
$ Parser SAM_V1_6_File_Level_Metadata
parse_SAM_V1_6_File_Level_Metadata Parser SAM_V1_6_File_Level_Metadata
-> Parser ByteString () -> Parser SAM_V1_6_File_Level_Metadata
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
endOfLine
  Word8
_                           <- Word8 -> Parser Word8
word8 Word8
10
  Maybe [SAM_V1_6_Reference_Sequence_Dictionary]
referencesequencedictionary <- Parser [SAM_V1_6_Reference_Sequence_Dictionary]
-> Parser (Maybe [SAM_V1_6_Reference_Sequence_Dictionary])
forall a. Parser a -> Parser (Maybe a)
maybeOption (Parser [SAM_V1_6_Reference_Sequence_Dictionary]
 -> Parser (Maybe [SAM_V1_6_Reference_Sequence_Dictionary]))
-> Parser [SAM_V1_6_Reference_Sequence_Dictionary]
-> Parser (Maybe [SAM_V1_6_Reference_Sequence_Dictionary])
forall a b. (a -> b) -> a -> b
$ Parser ByteString SAM_V1_6_Reference_Sequence_Dictionary
-> Parser [SAM_V1_6_Reference_Sequence_Dictionary]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
DABL.many' (Parser ByteString SAM_V1_6_Reference_Sequence_Dictionary
 -> Parser [SAM_V1_6_Reference_Sequence_Dictionary])
-> Parser ByteString SAM_V1_6_Reference_Sequence_Dictionary
-> Parser [SAM_V1_6_Reference_Sequence_Dictionary]
forall a b. (a -> b) -> a -> b
$ Parser ByteString SAM_V1_6_Reference_Sequence_Dictionary
parse_SAM_V1_6_Reference_Sequence_Dictionary Parser ByteString SAM_V1_6_Reference_Sequence_Dictionary
-> Parser ByteString ()
-> Parser ByteString SAM_V1_6_Reference_Sequence_Dictionary
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
endOfLine
  Word8
_                           <- Word8 -> Parser Word8
word8 Word8
10
  Maybe [SAM_V1_6_Read_Group]
readgroup                   <- Parser [SAM_V1_6_Read_Group]
-> Parser (Maybe [SAM_V1_6_Read_Group])
forall a. Parser a -> Parser (Maybe a)
maybeOption (Parser [SAM_V1_6_Read_Group]
 -> Parser (Maybe [SAM_V1_6_Read_Group]))
-> Parser [SAM_V1_6_Read_Group]
-> Parser (Maybe [SAM_V1_6_Read_Group])
forall a b. (a -> b) -> a -> b
$ Parser ByteString SAM_V1_6_Read_Group
-> Parser [SAM_V1_6_Read_Group]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
DABL.many' (Parser ByteString SAM_V1_6_Read_Group
 -> Parser [SAM_V1_6_Read_Group])
-> Parser ByteString SAM_V1_6_Read_Group
-> Parser [SAM_V1_6_Read_Group]
forall a b. (a -> b) -> a -> b
$ Parser ByteString SAM_V1_6_Read_Group
parse_SAM_V1_6_Read_Group Parser ByteString SAM_V1_6_Read_Group
-> Parser ByteString () -> Parser ByteString SAM_V1_6_Read_Group
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
endOfLine
  Word8
_                           <- Word8 -> Parser Word8
word8 Word8
10
  Maybe SAM_V1_6_Program
program                     <- Parser SAM_V1_6_Program -> Parser (Maybe SAM_V1_6_Program)
forall a. Parser a -> Parser (Maybe a)
maybeOption (Parser SAM_V1_6_Program -> Parser (Maybe SAM_V1_6_Program))
-> Parser SAM_V1_6_Program -> Parser (Maybe SAM_V1_6_Program)
forall a b. (a -> b) -> a -> b
$ Parser SAM_V1_6_Program
parse_SAM_V1_6_Program Parser SAM_V1_6_Program
-> Parser ByteString () -> Parser SAM_V1_6_Program
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
endOfLine
  Word8
_                           <- Word8 -> Parser Word8
word8 Word8
10
  Maybe [SAM_V1_6_One_Line_Comment]
onelinecomment              <- Parser [SAM_V1_6_One_Line_Comment]
-> Parser (Maybe [SAM_V1_6_One_Line_Comment])
forall a. Parser a -> Parser (Maybe a)
maybeOption (Parser [SAM_V1_6_One_Line_Comment]
 -> Parser (Maybe [SAM_V1_6_One_Line_Comment]))
-> Parser [SAM_V1_6_One_Line_Comment]
-> Parser (Maybe [SAM_V1_6_One_Line_Comment])
forall a b. (a -> b) -> a -> b
$ Parser ByteString SAM_V1_6_One_Line_Comment
-> Parser [SAM_V1_6_One_Line_Comment]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
DABL.many' (Parser ByteString SAM_V1_6_One_Line_Comment
 -> Parser [SAM_V1_6_One_Line_Comment])
-> Parser ByteString SAM_V1_6_One_Line_Comment
-> Parser [SAM_V1_6_One_Line_Comment]
forall a b. (a -> b) -> a -> b
$ Parser ByteString SAM_V1_6_One_Line_Comment
parse_SAM_V1_6_One_Line_Comment Parser ByteString SAM_V1_6_One_Line_Comment
-> Parser ByteString ()
-> Parser ByteString SAM_V1_6_One_Line_Comment
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
endOfLine 
  Word8
_                           <- Word8 -> Parser Word8
word8 Word8
10
  [SAM_V1_6_Alignment]
alignment                   <- Parser ByteString SAM_V1_6_Alignment
-> Parser ByteString [SAM_V1_6_Alignment]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
DABL.many' (Parser ByteString SAM_V1_6_Alignment
 -> Parser ByteString [SAM_V1_6_Alignment])
-> Parser ByteString SAM_V1_6_Alignment
-> Parser ByteString [SAM_V1_6_Alignment]
forall a b. (a -> b) -> a -> b
$ Parser ByteString SAM_V1_6_Alignment
parse_SAM_V1_6_Alignment Parser ByteString SAM_V1_6_Alignment
-> Parser ByteString () -> Parser ByteString SAM_V1_6_Alignment
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
endOfLine
  SAM_V1_6 -> Parser SAM_V1_6
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return SAM_V1_6 { sam_v1_6_file_level_metadata :: Maybe SAM_V1_6_File_Level_Metadata
sam_v1_6_file_level_metadata           = Maybe SAM_V1_6_File_Level_Metadata
filelevelmetadata
                  , sam_v1_6_reference_sequence_dictionary :: Maybe (Seq SAM_V1_6_Reference_Sequence_Dictionary)
sam_v1_6_reference_sequence_dictionary = case Maybe [SAM_V1_6_Reference_Sequence_Dictionary]
referencesequencedictionary of
                                                               Maybe [SAM_V1_6_Reference_Sequence_Dictionary]
Nothing                           -> Maybe (Seq SAM_V1_6_Reference_Sequence_Dictionary)
forall a. Maybe a
Nothing
                                                               Just [SAM_V1_6_Reference_Sequence_Dictionary]
referencesequencedictionaryf -> Seq SAM_V1_6_Reference_Sequence_Dictionary
-> Maybe (Seq SAM_V1_6_Reference_Sequence_Dictionary)
forall a. a -> Maybe a
Just (Seq SAM_V1_6_Reference_Sequence_Dictionary
 -> Maybe (Seq SAM_V1_6_Reference_Sequence_Dictionary))
-> Seq SAM_V1_6_Reference_Sequence_Dictionary
-> Maybe (Seq SAM_V1_6_Reference_Sequence_Dictionary)
forall a b. (a -> b) -> a -> b
$ [SAM_V1_6_Reference_Sequence_Dictionary]
-> Seq SAM_V1_6_Reference_Sequence_Dictionary
forall a. [a] -> Seq a
DSeq.fromList [SAM_V1_6_Reference_Sequence_Dictionary]
referencesequencedictionaryf
                  , sam_v1_6_read_group :: Maybe (Seq SAM_V1_6_Read_Group)
sam_v1_6_read_group                    = case Maybe [SAM_V1_6_Read_Group]
readgroup of
                                                               Maybe [SAM_V1_6_Read_Group]
Nothing         -> Maybe (Seq SAM_V1_6_Read_Group)
forall a. Maybe a
Nothing
                                                               Just [SAM_V1_6_Read_Group]
readgroupf -> Seq SAM_V1_6_Read_Group -> Maybe (Seq SAM_V1_6_Read_Group)
forall a. a -> Maybe a
Just (Seq SAM_V1_6_Read_Group -> Maybe (Seq SAM_V1_6_Read_Group))
-> Seq SAM_V1_6_Read_Group -> Maybe (Seq SAM_V1_6_Read_Group)
forall a b. (a -> b) -> a -> b
$ [SAM_V1_6_Read_Group] -> Seq SAM_V1_6_Read_Group
forall a. [a] -> Seq a
DSeq.fromList [SAM_V1_6_Read_Group]
readgroupf
                  , sam_v1_6_program :: Maybe SAM_V1_6_Program
sam_v1_6_program                       = Maybe SAM_V1_6_Program
program
                  , sam_v1_6_one_line_comment :: Maybe (Seq SAM_V1_6_One_Line_Comment)
sam_v1_6_one_line_comment              = case Maybe [SAM_V1_6_One_Line_Comment]
onelinecomment of
                                                               Maybe [SAM_V1_6_One_Line_Comment]
Nothing              -> Maybe (Seq SAM_V1_6_One_Line_Comment)
forall a. Maybe a
Nothing
                                                               Just [SAM_V1_6_One_Line_Comment]
onelinecommentf -> Seq SAM_V1_6_One_Line_Comment
-> Maybe (Seq SAM_V1_6_One_Line_Comment)
forall a. a -> Maybe a
Just (Seq SAM_V1_6_One_Line_Comment
 -> Maybe (Seq SAM_V1_6_One_Line_Comment))
-> Seq SAM_V1_6_One_Line_Comment
-> Maybe (Seq SAM_V1_6_One_Line_Comment)
forall a b. (a -> b) -> a -> b
$ [SAM_V1_6_One_Line_Comment] -> Seq SAM_V1_6_One_Line_Comment
forall a. [a] -> Seq a
DSeq.fromList [SAM_V1_6_One_Line_Comment]
onelinecommentf
                  , sam_v1_6_alignment :: Seq SAM_V1_6_Alignment
sam_v1_6_alignment                     = [SAM_V1_6_Alignment] -> Seq SAM_V1_6_Alignment
forall a. [a] -> Seq a
DSeq.fromList [SAM_V1_6_Alignment]
alignment
                  } 

-- | Run the @"SAM_V1_6"@ parser.
readSAM_V1_6_LBS :: DBL.ByteString
                 -> IO SAM_V1_6
readSAM_V1_6_LBS :: ByteString -> IO SAM_V1_6
readSAM_V1_6_LBS ByteString
lbs =
  case (Parser SAM_V1_6 -> ByteString -> Either String SAM_V1_6
forall a. Parser a -> ByteString -> Either String a
DABL.parseOnly Parser SAM_V1_6
parse_SAM_V1_6 ByteString
lbs) of
    Left  String
samparseerror -> String -> IO SAM_V1_6
forall a. HasCallStack => String -> a
error String
samparseerror
    Right SAM_V1_6
sam           -> SAM_V1_6 -> IO SAM_V1_6
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return SAM_V1_6
sam

-- | Read a @"SAM_V1_6"@ from a file.
-- The file is checked for errors as it parses
-- the SAM file.
-- See the [SAM v1.6](http://samtools.github.io/hts-specs/SAMv1.pdf) specification documentation.
readSAM_V1_6 :: FilePath -- ^ Path to SAM file.
             -> IO SAM_V1_6
readSAM_V1_6 :: String -> IO SAM_V1_6
readSAM_V1_6 String
fp = do
  let lazysamfile :: Stream IO (Array Word8)
lazysamfile = Unfold IO String (Array Word8) -> String -> Stream IO (Array Word8)
forall (m :: * -> *) a b.
Applicative m =>
Unfold m a b -> a -> Stream m b
S.unfold Unfold IO String (Array Word8)
forall (m :: * -> *).
(MonadIO m, MonadCatch m) =>
Unfold m String (Array Word8)
StreamlyInternalFile.chunkReader String
fp
  ByteString
lazysamfilef    <- Stream IO (Array Word8) -> IO ByteString
StreamlyLByteString.fromChunksIO Stream IO (Array Word8)
lazysamfile
  ByteString -> IO SAM_V1_6
readSAM_V1_6_LBS ByteString
lazysamfilef