-- Translation Module
-- By Gregory W. Schwartz

{- | Collects all functions pertaining to the translation of nucleotides to
amino acids for Text.
-}

{-# LANGUAGE OverloadedStrings #-}

module Data.Fasta.Text.Translation ( codon2aa
                                   , translate ) where

-- Built in
import Data.Either
import qualified Data.Text as T

-- Local
import Data.Fasta.Text.Types

-- | Converts a codon to an amino acid
-- Remember, if there is an "N" in that DNA sequence, then it is invalid
codon2aa :: Codon -> Either T.Text T.Text
codon2aa x
    | codon `elem` ["GCT", "GCC", "GCA", "GCG"]               = Right "A"
    | codon `elem` ["CGT", "CGC", "CGA", "CGG", "AGA", "AGG"] = Right "R"
    | codon `elem` ["AAT", "AAC"]                             = Right "N"
    | codon `elem` ["GAT", "GAC"]                             = Right "D"
    | codon `elem` ["TGT", "TGC"]                             = Right "C"
    | codon `elem` ["CAA", "CAG"]                             = Right "Q"
    | codon `elem` ["GAA", "GAG"]                             = Right "E"
    | codon `elem` ["GGT", "GGC", "GGA", "GGG"]               = Right "G"
    | codon `elem` ["CAT", "CAC"]                             = Right "H"
    | codon `elem` ["ATT", "ATC", "ATA"]                      = Right "I"
    | codon `elem` ["ATG"]                                    = Right "M"
    | codon `elem` ["TTA", "TTG", "CTT", "CTC", "CTA", "CTG"] = Right "L"
    | codon `elem` ["AAA", "AAG"]                             = Right "K"
    | codon `elem` ["TTT", "TTC"]                             = Right "F"
    | codon `elem` ["CCT", "CCC", "CCA", "CCG"]               = Right "P"
    | codon `elem` ["TCT", "TCC", "TCA", "TCG", "AGT", "AGC"] = Right "S"
    | codon `elem` ["ACT", "ACC", "ACA", "ACG"]               = Right "T"
    | codon `elem` ["TGG"]                                    = Right "W"
    | codon `elem` ["TAT", "TAC"]                             = Right "Y"
    | codon `elem` ["GTT", "GTC", "GTA", "GTG"]               = Right "V"
    | codon `elem` ["TAA", "TGA", "TAG"]                      = Right "*"
    | codon `elem` ["---", "..."]                             = Right "-"
    | codon == "~~~"                                          = Right "-"
    | "N" `T.isInfixOf` codon                                 = Right "-"
    | "-" `T.isInfixOf` codon                                 = Right "-"
    | "." `T.isInfixOf` codon                                 = Right "-"
    | otherwise                                               = Left errorMsg
  where
    codon    = T.toUpper x
    errorMsg = T.append "Unidentified codon: " codon

-- | Translates a string of nucleotides. Returns a text with the error if the
-- codon is invalid.
translate :: Int -> FastaSequence -> Either T.Text FastaSequence
translate pos x
    | any isLeft' translation = Left $ head . lefts $ translation
    | otherwise               = Right $ x { fastaSeq = T.concat
                                                     . rights
                                                     $ translation }
  where
    translation = map codon2aa
                . filter ((== 3) . T.length)
                . T.chunksOf 3
                . T.drop (pos - 1)
                . fastaSeq
                $ x
    isLeft' (Left _) = True
    isLeft' _        = False