{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module SLynx.Filter.Filter
( filterRowsCmd,
filterColsCmd,
)
where
import Control.Monad (when)
import Control.Monad.Trans.Reader
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Maybe (fromMaybe)
import qualified ELynx.Data.Sequence.Alignment as M
import qualified ELynx.Data.Sequence.Sequence as S
import ELynx.Export.Sequence.Fasta
import ELynx.Tools
import SLynx.Filter.Options
import SLynx.Tools
filterRows :: Maybe Int -> Maybe Int -> Bool -> [S.Sequence] -> BL.ByteString
filterRows :: Maybe Int -> Maybe Int -> Bool -> [Sequence] -> ByteString
filterRows Maybe Int
ml Maybe Int
ms Bool
std [Sequence]
ss = [Sequence] -> ByteString
sequencesToFasta ([Sequence] -> ByteString) -> [Sequence] -> ByteString
forall a b. (a -> b) -> a -> b
$ [[Sequence] -> [Sequence]] -> [Sequence] -> [Sequence]
forall a. [a -> a] -> a -> a
compose [[Sequence] -> [Sequence]]
filters [Sequence]
ss
where
filters' :: [[Sequence] -> [Sequence]]
filters' =
(Maybe ([Sequence] -> [Sequence]) -> [Sequence] -> [Sequence])
-> [Maybe ([Sequence] -> [Sequence])] -> [[Sequence] -> [Sequence]]
forall a b. (a -> b) -> [a] -> [b]
map (([Sequence] -> [Sequence])
-> Maybe ([Sequence] -> [Sequence]) -> [Sequence] -> [Sequence]
forall a. a -> Maybe a -> a
fromMaybe [Sequence] -> [Sequence]
forall a. a -> a
id) [Int -> [Sequence] -> [Sequence]
S.filterLongerThan (Int -> [Sequence] -> [Sequence])
-> Maybe Int -> Maybe ([Sequence] -> [Sequence])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Int
ml, Int -> [Sequence] -> [Sequence]
S.filterShorterThan (Int -> [Sequence] -> [Sequence])
-> Maybe Int -> Maybe ([Sequence] -> [Sequence])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Int
ms]
filters :: [[Sequence] -> [Sequence]]
filters = if Bool
std then [Sequence] -> [Sequence]
S.filterStandard ([Sequence] -> [Sequence])
-> [[Sequence] -> [Sequence]] -> [[Sequence] -> [Sequence]]
forall a. a -> [a] -> [a]
: [[Sequence] -> [Sequence]]
filters' else [[Sequence] -> [Sequence]]
filters'
filterRowsCmd :: ELynx FilterRowsArguments ()
filterRowsCmd :: ELynx FilterRowsArguments ()
filterRowsCmd = do
(FilterRowsArguments Alphabet
al FilePath
inFile Maybe Int
long Maybe Int
short Bool
std) <- Environment FilterRowsArguments -> FilterRowsArguments
forall a. Environment a -> a
localArguments (Environment FilterRowsArguments -> FilterRowsArguments)
-> ReaderT
(Environment FilterRowsArguments)
IO
(Environment FilterRowsArguments)
-> ReaderT (Environment FilterRowsArguments) IO FilterRowsArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
(Environment FilterRowsArguments)
IO
(Environment FilterRowsArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
ELynx FilterRowsArguments ()
-> (Int -> ELynx FilterRowsArguments ())
-> Maybe Int
-> ELynx FilterRowsArguments ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
(() -> ELynx FilterRowsArguments ()
forall (m :: * -> *) a. Monad m => a -> m a
return ())
( \Int
val ->
FilePath -> ELynx FilterRowsArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS (FilePath -> ELynx FilterRowsArguments ())
-> FilePath -> ELynx FilterRowsArguments ()
forall a b. (a -> b) -> a -> b
$ FilePath
" Keep sequences longer than " FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> Int -> FilePath
forall a. Show a => a -> FilePath
show Int
val FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> FilePath
"."
)
Maybe Int
long
ELynx FilterRowsArguments ()
-> (Int -> ELynx FilterRowsArguments ())
-> Maybe Int
-> ELynx FilterRowsArguments ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
(() -> ELynx FilterRowsArguments ()
forall (m :: * -> *) a. Monad m => a -> m a
return ())
( \Int
val ->
FilePath -> ELynx FilterRowsArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS (FilePath -> ELynx FilterRowsArguments ())
-> FilePath -> ELynx FilterRowsArguments ()
forall a b. (a -> b) -> a -> b
$ FilePath
" Keep sequences shorter than " FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> Int -> FilePath
forall a. Show a => a -> FilePath
show Int
val FilePath -> FilePath -> FilePath
forall a. Semigroup a => a -> a -> a
<> FilePath
"."
)
Maybe Int
short
Bool
-> ELynx FilterRowsArguments () -> ELynx FilterRowsArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
std (ELynx FilterRowsArguments () -> ELynx FilterRowsArguments ())
-> ELynx FilterRowsArguments () -> ELynx FilterRowsArguments ()
forall a b. (a -> b) -> a -> b
$
FilePath -> ELynx FilterRowsArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS
FilePath
" Keep sequences containing at least one standard (i.e., non-IUPAC) character."
[Sequence]
ss <- Alphabet
-> FilePath -> Logger (Environment FilterRowsArguments) [Sequence]
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> FilePath -> Logger e [Sequence]
readSeqs Alphabet
al FilePath
inFile
let result :: ByteString
result = Maybe Int -> Maybe Int -> Bool -> [Sequence] -> ByteString
filterRows Maybe Int
long Maybe Int
short Bool
std [Sequence]
ss
FilePath -> ByteString -> FilePath -> ELynx FilterRowsArguments ()
forall a.
Reproducible a =>
FilePath -> ByteString -> FilePath -> ELynx a ()
out FilePath
"filtered sequences" ByteString
result FilePath
".fasta"
filterCols :: Maybe Double -> [S.Sequence] -> BL.ByteString
filterCols :: Maybe Double -> [Sequence] -> ByteString
filterCols Maybe Double
ms [Sequence]
ss = [Sequence] -> ByteString
sequencesToFasta ([Sequence] -> ByteString)
-> (Alignment -> [Sequence]) -> Alignment -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Alignment -> [Sequence]
M.toSequences (Alignment -> ByteString) -> Alignment -> ByteString
forall a b. (a -> b) -> a -> b
$ [Alignment -> Alignment] -> Alignment -> Alignment
forall a. [a -> a] -> a -> a
compose [Alignment -> Alignment]
filters Alignment
a
where
a :: Alignment
a = (FilePath -> Alignment)
-> (Alignment -> Alignment)
-> Either FilePath Alignment
-> Alignment
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either FilePath -> Alignment
forall a. HasCallStack => FilePath -> a
error Alignment -> Alignment
forall a. a -> a
id ([Sequence] -> Either FilePath Alignment
M.fromSequences [Sequence]
ss)
filters :: [Alignment -> Alignment]
filters = (Maybe (Alignment -> Alignment) -> Alignment -> Alignment)
-> [Maybe (Alignment -> Alignment)] -> [Alignment -> Alignment]
forall a b. (a -> b) -> [a] -> [b]
map ((Alignment -> Alignment)
-> Maybe (Alignment -> Alignment) -> Alignment -> Alignment
forall a. a -> Maybe a -> a
fromMaybe Alignment -> Alignment
forall a. a -> a
id) [Double -> Alignment -> Alignment
M.filterColsStd (Double -> Alignment -> Alignment)
-> Maybe Double -> Maybe (Alignment -> Alignment)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Double
ms]
filterColsCmd :: ELynx FilterColsArguments ()
filterColsCmd :: ELynx FilterColsArguments ()
filterColsCmd = do
(FilterColsArguments Alphabet
al FilePath
inFile Maybe Double
standard) <- Environment FilterColsArguments -> FilterColsArguments
forall a. Environment a -> a
localArguments (Environment FilterColsArguments -> FilterColsArguments)
-> ReaderT
(Environment FilterColsArguments)
IO
(Environment FilterColsArguments)
-> ReaderT (Environment FilterColsArguments) IO FilterColsArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
(Environment FilterColsArguments)
IO
(Environment FilterColsArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
case Maybe Double
standard of
Maybe Double
Nothing -> () -> ELynx FilterColsArguments ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just Double
p ->
FilePath -> ELynx FilterColsArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
FilePath -> Logger e ()
logInfoS (FilePath -> ELynx FilterColsArguments ())
-> FilePath -> ELynx FilterColsArguments ()
forall a b. (a -> b) -> a -> b
$
FilePath
" Keep columns with a proportion of standard (non-IUPAC) characters larger than "
FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ Double -> FilePath
forall a. Show a => a -> FilePath
show Double
p
FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ FilePath
"."
[Sequence]
ss <- Alphabet
-> FilePath -> Logger (Environment FilterColsArguments) [Sequence]
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> FilePath -> Logger e [Sequence]
readSeqs Alphabet
al FilePath
inFile
let result :: ByteString
result = Maybe Double -> [Sequence] -> ByteString
filterCols Maybe Double
standard [Sequence]
ss
FilePath -> ByteString -> FilePath -> ELynx FilterColsArguments ()
forall a.
Reproducible a =>
FilePath -> ByteString -> FilePath -> ELynx a ()
out FilePath
"filtered sequences" ByteString
result FilePath
".fasta"