-- | This module provides functionality for translation between nucleotides
-- and amino acids.
--
-- NOTE 'aaDNAseq' is lossy. Might be a good idea to consider something
-- more involved?
--
-- TODO we need different functions, depending on if we have a part of
-- a genome in @DNA@ form, or some messenger @RNA@. It'll probably also be
-- useful to return @Either@, with @Left@ indicating error like partially
-- translated sequence due to intermediate stop codons, or so.
--
-- TODO 'dnaAAseq' and 'aaDNAseq' can be nicely optimized using 'flatten'
-- and friends.

module Biobase.Primary.Trans where

import           Control.Lens
import           Control.Arrow ((***))
import           Data.ByteString.Char8 (ByteString,unpack)
import           Data.Map.Strict (Map)
import           Data.Tuple (swap)
import qualified Data.Map.Strict as M
import qualified Data.Vector.Unboxed as VU

import           Biobase.Types.BioSequence
import           Biobase.Types.Codon

import           Biobase.Primary.AA
import           Biobase.Primary.Nuc
import           Biobase.Primary.Letter
import           Biobase.GeneticCodes.Translation
import           Biobase.GeneticCodes.Types



-- | Transform translation tables into the @Letter DNA/Letter AA@ format.

letterTranslationTable :: TranslationTable Char Char -> TranslationTable (Letter DNA n) (Letter AA n)
letterTranslationTable :: TranslationTable Char Char
-> TranslationTable (Letter DNA n) (Letter AA n)
letterTranslationTable TranslationTable Char Char
tbl = TranslationTable :: forall c a.
Map (Codon c) (TranslationElement c a)
-> Map a [TranslationElement c a]
-> Int
-> Text
-> TranslationTable c a
TranslationTable
  { _codonToAminoAcid :: Map
  (Codon (Letter DNA n))
  (TranslationElement (Letter DNA n) (Letter AA n))
_codonToAminoAcid  = [(Codon (Letter DNA n),
  TranslationElement (Letter DNA n) (Letter AA n))]
-> Map
     (Codon (Letter DNA n))
     (TranslationElement (Letter DNA n) (Letter AA n))
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Codon (Letter DNA n),
   TranslationElement (Letter DNA n) (Letter AA n))]
 -> Map
      (Codon (Letter DNA n))
      (TranslationElement (Letter DNA n) (Letter AA n)))
-> (Map (Codon Char) (TranslationElement Char Char)
    -> [(Codon (Letter DNA n),
         TranslationElement (Letter DNA n) (Letter AA n))])
-> Map (Codon Char) (TranslationElement Char Char)
-> Map
     (Codon (Letter DNA n))
     (TranslationElement (Letter DNA n) (Letter AA n))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Codon Char, TranslationElement Char Char)
 -> (Codon (Letter DNA n),
     TranslationElement (Letter DNA n) (Letter AA n)))
-> [(Codon Char, TranslationElement Char Char)]
-> [(Codon (Letter DNA n),
     TranslationElement (Letter DNA n) (Letter AA n))]
forall a b. (a -> b) -> [a] -> [b]
map (Codon Char -> Codon (Letter DNA n)
forall k (n :: k). Codon Char -> Codon (Letter DNA n)
ftriplet (Codon Char -> Codon (Letter DNA n))
-> (TranslationElement Char Char
    -> TranslationElement (Letter DNA n) (Letter AA n))
-> (Codon Char, TranslationElement Char Char)
-> (Codon (Letter DNA n),
    TranslationElement (Letter DNA n) (Letter AA n))
forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** TranslationElement Char Char
-> TranslationElement (Letter DNA n) (Letter AA n)
forall k (n :: k).
TranslationElement Char Char
-> TranslationElement (Letter DNA n) (Letter AA n)
felement) ([(Codon Char, TranslationElement Char Char)]
 -> [(Codon (Letter DNA n),
      TranslationElement (Letter DNA n) (Letter AA n))])
-> (Map (Codon Char) (TranslationElement Char Char)
    -> [(Codon Char, TranslationElement Char Char)])
-> Map (Codon Char) (TranslationElement Char Char)
-> [(Codon (Letter DNA n),
     TranslationElement (Letter DNA n) (Letter AA n))]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map (Codon Char) (TranslationElement Char Char)
-> [(Codon Char, TranslationElement Char Char)]
forall k a. Map k a -> [(k, a)]
M.toList (Map (Codon Char) (TranslationElement Char Char)
 -> Map
      (Codon (Letter DNA n))
      (TranslationElement (Letter DNA n) (Letter AA n)))
-> Map (Codon Char) (TranslationElement Char Char)
-> Map
     (Codon (Letter DNA n))
     (TranslationElement (Letter DNA n) (Letter AA n))
forall a b. (a -> b) -> a -> b
$ TranslationTable Char Char
tblTranslationTable Char Char
-> Getting
     (Map (Codon Char) (TranslationElement Char Char))
     (TranslationTable Char Char)
     (Map (Codon Char) (TranslationElement Char Char))
-> Map (Codon Char) (TranslationElement Char Char)
forall s a. s -> Getting a s a -> a
^.Getting
  (Map (Codon Char) (TranslationElement Char Char))
  (TranslationTable Char Char)
  (Map (Codon Char) (TranslationElement Char Char))
forall c a.
Lens'
  (TranslationTable c a) (Map (Codon c) (TranslationElement c a))
codonToAminoAcid
  , _aminoAcidtoCodons :: Map (Letter AA n) [TranslationElement (Letter DNA n) (Letter AA n)]
_aminoAcidtoCodons = [(Letter AA n, [TranslationElement (Letter DNA n) (Letter AA n)])]
-> Map
     (Letter AA n) [TranslationElement (Letter DNA n) (Letter AA n)]
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Letter AA n, [TranslationElement (Letter DNA n) (Letter AA n)])]
 -> Map
      (Letter AA n) [TranslationElement (Letter DNA n) (Letter AA n)])
-> (Map Char [TranslationElement Char Char]
    -> [(Letter AA n,
         [TranslationElement (Letter DNA n) (Letter AA n)])])
-> Map Char [TranslationElement Char Char]
-> Map
     (Letter AA n) [TranslationElement (Letter DNA n) (Letter AA n)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char, [TranslationElement Char Char])
 -> (Letter AA n,
     [TranslationElement (Letter DNA n) (Letter AA n)]))
-> [(Char, [TranslationElement Char Char])]
-> [(Letter AA n,
     [TranslationElement (Letter DNA n) (Letter AA n)])]
forall a b. (a -> b) -> [a] -> [b]
map (Char -> Letter AA n
forall k (n :: k). Char -> Letter AA n
charAA (Char -> Letter AA n)
-> ([TranslationElement Char Char]
    -> [TranslationElement (Letter DNA n) (Letter AA n)])
-> (Char, [TranslationElement Char Char])
-> (Letter AA n, [TranslationElement (Letter DNA n) (Letter AA n)])
forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** (TranslationElement Char Char
 -> TranslationElement (Letter DNA n) (Letter AA n))
-> [TranslationElement Char Char]
-> [TranslationElement (Letter DNA n) (Letter AA n)]
forall a b. (a -> b) -> [a] -> [b]
map TranslationElement Char Char
-> TranslationElement (Letter DNA n) (Letter AA n)
forall k (n :: k).
TranslationElement Char Char
-> TranslationElement (Letter DNA n) (Letter AA n)
felement) ([(Char, [TranslationElement Char Char])]
 -> [(Letter AA n,
      [TranslationElement (Letter DNA n) (Letter AA n)])])
-> (Map Char [TranslationElement Char Char]
    -> [(Char, [TranslationElement Char Char])])
-> Map Char [TranslationElement Char Char]
-> [(Letter AA n,
     [TranslationElement (Letter DNA n) (Letter AA n)])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Char [TranslationElement Char Char]
-> [(Char, [TranslationElement Char Char])]
forall k a. Map k a -> [(k, a)]
M.toList (Map Char [TranslationElement Char Char]
 -> Map
      (Letter AA n) [TranslationElement (Letter DNA n) (Letter AA n)])
-> Map Char [TranslationElement Char Char]
-> Map
     (Letter AA n) [TranslationElement (Letter DNA n) (Letter AA n)]
forall a b. (a -> b) -> a -> b
$ TranslationTable Char Char
tblTranslationTable Char Char
-> Getting
     (Map Char [TranslationElement Char Char])
     (TranslationTable Char Char)
     (Map Char [TranslationElement Char Char])
-> Map Char [TranslationElement Char Char]
forall s a. s -> Getting a s a -> a
^.Getting
  (Map Char [TranslationElement Char Char])
  (TranslationTable Char Char)
  (Map Char [TranslationElement Char Char])
forall c a.
Lens' (TranslationTable c a) (Map a [TranslationElement c a])
aminoAcidtoCodons
  , _tableID :: Int
_tableID           = TranslationTable Char Char
tblTranslationTable Char Char
-> Getting Int (TranslationTable Char Char) Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int (TranslationTable Char Char) Int
forall c a. Lens' (TranslationTable c a) Int
tableID
  , _tableName :: Text
_tableName         = TranslationTable Char Char
tblTranslationTable Char Char
-> Getting Text (TranslationTable Char Char) Text -> Text
forall s a. s -> Getting a s a -> a
^.Getting Text (TranslationTable Char Char) Text
forall c a. Lens' (TranslationTable c a) Text
tableName
  } where ftriplet :: Codon Char -> Codon (Letter DNA n)
          ftriplet :: Codon Char -> Codon (Letter DNA n)
ftriplet = ASetter (Codon Char) (Codon (Letter DNA n)) Char (Letter DNA n)
-> (Char -> Letter DNA n) -> Codon Char -> Codon (Letter DNA n)
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over ASetter (Codon Char) (Codon (Letter DNA n)) Char (Letter DNA n)
forall s t a b. Each s t a b => Traversal s t a b
each Char -> Letter DNA n
forall k (n :: k). Char -> Letter DNA n
charDNA
          felement :: TranslationElement Char Char -> TranslationElement (Letter DNA n) (Letter AA n)
          felement :: TranslationElement Char Char
-> TranslationElement (Letter DNA n) (Letter AA n)
felement = ASetter
  (TranslationElement Char (Letter AA n))
  (TranslationElement (Letter DNA n) (Letter AA n))
  Char
  (Letter DNA n)
-> (Char -> Letter DNA n)
-> TranslationElement Char (Letter AA n)
-> TranslationElement (Letter DNA n) (Letter AA n)
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over ((Codon Char -> Identity (Codon (Letter DNA n)))
-> TranslationElement Char (Letter AA n)
-> Identity (TranslationElement (Letter DNA n) (Letter AA n))
forall c1 a c2.
Lens
  (TranslationElement c1 a)
  (TranslationElement c2 a)
  (Codon c1)
  (Codon c2)
baseCodon((Codon Char -> Identity (Codon (Letter DNA n)))
 -> TranslationElement Char (Letter AA n)
 -> Identity (TranslationElement (Letter DNA n) (Letter AA n)))
-> ((Char -> Identity (Letter DNA n))
    -> Codon Char -> Identity (Codon (Letter DNA n)))
-> ASetter
     (TranslationElement Char (Letter AA n))
     (TranslationElement (Letter DNA n) (Letter AA n))
     Char
     (Letter DNA n)
forall b c a. (b -> c) -> (a -> b) -> a -> c
.(Char -> Identity (Letter DNA n))
-> Codon Char -> Identity (Codon (Letter DNA n))
forall s t a b. Each s t a b => Traversal s t a b
each) Char -> Letter DNA n
forall k (n :: k). Char -> Letter DNA n
charDNA (TranslationElement Char (Letter AA n)
 -> TranslationElement (Letter DNA n) (Letter AA n))
-> (TranslationElement Char Char
    -> TranslationElement Char (Letter AA n))
-> TranslationElement Char Char
-> TranslationElement (Letter DNA n) (Letter AA n)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ASetter
  (TranslationElement Char Char)
  (TranslationElement Char (Letter AA n))
  Char
  (Letter AA n)
-> (Char -> Letter AA n)
-> TranslationElement Char Char
-> TranslationElement Char (Letter AA n)
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over ASetter
  (TranslationElement Char Char)
  (TranslationElement Char (Letter AA n))
  Char
  (Letter AA n)
forall c a1 a2.
Lens (TranslationElement c a1) (TranslationElement c a2) a1 a2
aminoAcid Char -> Letter AA n
forall k (n :: k). Char -> Letter AA n
charAA

instance Translation (Codon (Letter DNA n)) where
  type TargetType (Codon (Letter DNA n)) = Letter AA n
  type CodonType (Codon (Letter DNA n)) = Letter DNA n
  type AAType (Codon (Letter DNA n)) = Letter AA n
  translate :: TranslationTable
  (CodonType (Codon (Letter DNA n))) (AAType (Codon (Letter DNA n)))
-> Codon (Letter DNA n) -> TargetType (Codon (Letter DNA n))
translate TranslationTable
  (CodonType (Codon (Letter DNA n))) (AAType (Codon (Letter DNA n)))
tbl Codon (Letter DNA n)
t = Letter AA n
-> (TranslationElement (Letter DNA n) (Letter AA n) -> Letter AA n)
-> Maybe (TranslationElement (Letter DNA n) (Letter AA n))
-> Letter AA n
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Letter AA n
forall k (n :: k). Letter AA n
Unknown TranslationElement (Letter DNA n) (Letter AA n) -> Letter AA n
forall c a. TranslationElement c a -> a
_aminoAcid (Maybe (TranslationElement (Letter DNA n) (Letter AA n))
 -> Letter AA n)
-> Maybe (TranslationElement (Letter DNA n) (Letter AA n))
-> Letter AA n
forall a b. (a -> b) -> a -> b
$ Codon (Letter DNA n)
-> Map
     (Codon (Letter DNA n))
     (TranslationElement (Letter DNA n) (Letter AA n))
-> Maybe (TranslationElement (Letter DNA n) (Letter AA n))
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Codon (Letter DNA n)
t (TranslationTable
  (CodonType (Codon (Letter DNA n))) (AAType (Codon (Letter DNA n)))
TranslationTable (Letter DNA n) (Letter AA n)
tblTranslationTable (Letter DNA n) (Letter AA n)
-> Getting
     (Map
        (Codon (Letter DNA n))
        (TranslationElement (Letter DNA n) (Letter AA n)))
     (TranslationTable (Letter DNA n) (Letter AA n))
     (Map
        (Codon (Letter DNA n))
        (TranslationElement (Letter DNA n) (Letter AA n)))
-> Map
     (Codon (Letter DNA n))
     (TranslationElement (Letter DNA n) (Letter AA n))
forall s a. s -> Getting a s a -> a
^.Getting
  (Map
     (Codon (Letter DNA n))
     (TranslationElement (Letter DNA n) (Letter AA n)))
  (TranslationTable (Letter DNA n) (Letter AA n))
  (Map
     (Codon (Letter DNA n))
     (TranslationElement (Letter DNA n) (Letter AA n)))
forall c a.
Lens'
  (TranslationTable c a) (Map (Codon c) (TranslationElement c a))
codonToAminoAcid)
  {-# Inline translate #-}
  translateAllFrames :: TranslationTable
  (CodonType (Codon (Letter DNA n))) (AAType (Codon (Letter DNA n)))
-> Codon (Letter DNA n) -> TargetType (Codon (Letter DNA n))
translateAllFrames = TranslationTable
  (CodonType (Codon (Letter DNA n))) (AAType (Codon (Letter DNA n)))
-> Codon (Letter DNA n) -> TargetType (Codon (Letter DNA n))
forall t.
Translation t =>
TranslationTable (CodonType t) (AAType t) -> t -> TargetType t
translate
  {-# Inline translateAllFrames #-}

instance Translation (Primary DNA n) where
  type TargetType (Primary DNA n) = Primary AA n
  type CodonType (Primary DNA n) = Letter DNA n
  type AAType (Primary DNA n) = Letter AA n
  -- |
  --
  -- TODO we could consider returning @Nothing@ in case the input is not
  -- power-of-three.
  translate :: TranslationTable
  (CodonType (Primary DNA n)) (AAType (Primary DNA n))
-> Primary DNA n -> TargetType (Primary DNA n)
translate TranslationTable
  (CodonType (Primary DNA n)) (AAType (Primary DNA n))
tbl Primary DNA n
xs = Int
-> (Primary DNA n -> Maybe (Letter AA n, Primary DNA n))
-> Primary DNA n
-> Vector (Letter AA n)
forall a b. Unbox a => Int -> (b -> Maybe (a, b)) -> b -> Vector a
VU.unfoldrN (Primary DNA n -> Int
forall a. Unbox a => Vector a -> Int
VU.length Primary DNA n
xs Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
3) Primary DNA n -> Maybe (Letter AA n, Primary DNA n)
go Primary DNA n
xs
    where go :: Primary DNA n -> Maybe (Letter AA n, Primary DNA n)
go (Int -> Primary DNA n -> (Primary DNA n, Primary DNA n)
forall a. Unbox a => Int -> Vector a -> (Vector a, Vector a)
VU.splitAt Int
3 -> (Primary DNA n
hs,Primary DNA n
ts))
            | Primary DNA n -> Int
forall a. Unbox a => Vector a -> Int
VU.length Primary DNA n
hs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
3 = Maybe (Letter AA n, Primary DNA n)
forall a. Maybe a
Nothing
            | Bool
otherwise        = (Letter AA n, Primary DNA n) -> Maybe (Letter AA n, Primary DNA n)
forall a. a -> Maybe a
Just (TargetType (Codon (Letter DNA n))
Letter AA n
aa,Primary DNA n
ts)
            where [Letter DNA n
a,Letter DNA n
b,Letter DNA n
c] = Primary DNA n -> [Letter DNA n]
forall a. Unbox a => Vector a -> [a]
VU.toList Primary DNA n
hs
                  aa :: TargetType (Codon (Letter DNA n))
aa      = TranslationTable
  (CodonType (Codon (Letter DNA n))) (AAType (Codon (Letter DNA n)))
-> Codon (Letter DNA n) -> TargetType (Codon (Letter DNA n))
forall t.
Translation t =>
TranslationTable (CodonType t) (AAType t) -> t -> TargetType t
translate TranslationTable
  (CodonType (Primary DNA n)) (AAType (Primary DNA n))
TranslationTable
  (CodonType (Codon (Letter DNA n))) (AAType (Codon (Letter DNA n)))
tbl (Codon (Letter DNA n) -> TargetType (Codon (Letter DNA n)))
-> Codon (Letter DNA n) -> TargetType (Codon (Letter DNA n))
forall a b. (a -> b) -> a -> b
$ Letter DNA n
-> Letter DNA n -> Letter DNA n -> Codon (Letter DNA n)
forall c. c -> c -> c -> Codon c
Codon Letter DNA n
a Letter DNA n
b Letter DNA n
c
  {-# Inline translate #-}
  translateAllFrames :: TranslationTable
  (CodonType (Primary DNA n)) (AAType (Primary DNA n))
-> Primary DNA n -> TargetType (Primary DNA n)
translateAllFrames TranslationTable
  (CodonType (Primary DNA n)) (AAType (Primary DNA n))
tbl Primary DNA n
xs = Int
-> (Int -> Maybe (Letter AA n, Int)) -> Int -> Vector (Letter AA n)
forall a b. Unbox a => Int -> (b -> Maybe (a, b)) -> b -> Vector a
VU.unfoldrN (Primary DNA n -> Int
forall a. Unbox a => Vector a -> Int
VU.length Primary DNA n
xs) Int -> Maybe (Letter AA n, Int)
go Int
0
    where go :: Int -> Maybe (Letter AA n, Int)
go Int
0 = (Letter AA n, Int) -> Maybe (Letter AA n, Int)
forall a. a -> Maybe a
Just (Letter AA n
forall k (n :: k). Letter AA n
Undef,Int
1)
          go Int
1 = (Letter AA n, Int) -> Maybe (Letter AA n, Int)
forall a. a -> Maybe a
Just (Letter AA n
forall k (n :: k). Letter AA n
Undef,Int
2)
          go Int
k = (Letter AA n, Int) -> Maybe (Letter AA n, Int)
forall a. a -> Maybe a
Just (TranslationTable
  (CodonType (Codon (Letter DNA n))) (AAType (Codon (Letter DNA n)))
-> Codon (Letter DNA n) -> TargetType (Codon (Letter DNA n))
forall t.
Translation t =>
TranslationTable (CodonType t) (AAType t) -> t -> TargetType t
translate TranslationTable
  (CodonType (Primary DNA n)) (AAType (Primary DNA n))
TranslationTable
  (CodonType (Codon (Letter DNA n))) (AAType (Codon (Letter DNA n)))
tbl (Codon (Letter DNA n) -> TargetType (Codon (Letter DNA n)))
-> Codon (Letter DNA n) -> TargetType (Codon (Letter DNA n))
forall a b. (a -> b) -> a -> b
$ Letter DNA n
-> Letter DNA n -> Letter DNA n -> Codon (Letter DNA n)
forall c. c -> c -> c -> Codon c
Codon (Primary DNA n
xs Primary DNA n -> Int -> Letter DNA n
forall a. Unbox a => Vector a -> Int -> a
VU.! (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2)) (Primary DNA n
xs Primary DNA n -> Int -> Letter DNA n
forall a. Unbox a => Vector a -> Int -> a
VU.! (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)) (Primary DNA n
xs Primary DNA n -> Int -> Letter DNA n
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
k), Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
  {-# Inlinable translateAllFrames #-}