module ELynx.Data.MarkovProcess.AminoAcid
(
lg,
lgCustom,
wag,
wagCustom,
poisson,
poissonCustom,
gtr20,
alphaToPamlVec,
pamlToAlphaVec,
)
where
import Data.ByteString.Internal (c2w)
import Data.Maybe (fromMaybe)
import qualified Data.Vector.Storable as V
import Data.Word (Word8)
import ELynx.Data.Alphabet.Alphabet
import ELynx.Data.MarkovProcess.RateMatrix
import ELynx.Data.MarkovProcess.SubstitutionModel
import Numeric.LinearAlgebra
n :: Int
n :: Int
n = Int
20
aaAlphaOrder :: V.Vector Word8
aaAlphaOrder :: Vector Word8
aaAlphaOrder =
(Char -> Word8) -> Vector Char -> Vector Word8
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
V.map
Char -> Word8
c2w
(Vector Char -> Vector Word8) -> Vector Char -> Vector Word8
forall a b. (a -> b) -> a -> b
$ [Char] -> Vector Char
forall a. Storable a => [a] -> Vector a
V.fromList
[ Char
'A',
Char
'C',
Char
'D',
Char
'E',
Char
'F',
Char
'G',
Char
'H',
Char
'I',
Char
'K',
Char
'L',
Char
'M',
Char
'N',
Char
'P',
Char
'Q',
Char
'R',
Char
'S',
Char
'T',
Char
'V',
Char
'W',
Char
'Y'
]
aaPamlOrder :: V.Vector Word8
aaPamlOrder :: Vector Word8
aaPamlOrder =
(Char -> Word8) -> Vector Char -> Vector Word8
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
V.map
Char -> Word8
c2w
(Vector Char -> Vector Word8) -> Vector Char -> Vector Word8
forall a b. (a -> b) -> a -> b
$ [Char] -> Vector Char
forall a. Storable a => [a] -> Vector a
V.fromList
[ Char
'A',
Char
'R',
Char
'N',
Char
'D',
Char
'C',
Char
'Q',
Char
'E',
Char
'G',
Char
'H',
Char
'I',
Char
'L',
Char
'K',
Char
'M',
Char
'F',
Char
'P',
Char
'S',
Char
'T',
Char
'W',
Char
'Y',
Char
'V'
]
alphaIndexToPamlIndex :: Int -> Int
alphaIndexToPamlIndex :: Int -> Int
alphaIndexToPamlIndex Int
i =
Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe
([Char] -> Int
forall a. HasCallStack => [Char] -> a
error ([Char] -> Int) -> [Char] -> Int
forall a b. (a -> b) -> a -> b
$ [Char]
"Could not convert index " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
".")
(Word8 -> Vector Word8 -> Maybe Int
forall a. (Storable a, Eq a) => a -> Vector a -> Maybe Int
V.elemIndex Word8
aa Vector Word8
aaPamlOrder)
where
aa :: Word8
aa = Vector Word8
aaAlphaOrder Vector Word8 -> Int -> Word8
forall a. Storable a => Vector a -> Int -> a
V.! Int
i
pamlIndexToAlphaIndex :: Int -> Int
pamlIndexToAlphaIndex :: Int -> Int
pamlIndexToAlphaIndex Int
i =
Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe
([Char] -> Int
forall a. HasCallStack => [Char] -> a
error ([Char] -> Int) -> [Char] -> Int
forall a b. (a -> b) -> a -> b
$ [Char]
"Could not convert index " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
".")
(Word8 -> Vector Word8 -> Maybe Int
forall a. (Storable a, Eq a) => a -> Vector a -> Maybe Int
V.elemIndex Word8
aa Vector Word8
aaAlphaOrder)
where
aa :: Word8
aa = Vector Word8
aaPamlOrder Vector Word8 -> Int -> Word8
forall a. Storable a => Vector a -> Int -> a
V.! Int
i
pamlToAlphaVec :: Vector R -> Vector R
pamlToAlphaVec :: Vector R -> Vector R
pamlToAlphaVec Vector R
v = Int -> (R -> R) -> Vector R
forall d f (c :: * -> *) e. Build d f c e => d -> f -> c e
build Int
n (\R
i -> Vector R
v Vector R -> Int -> R
forall c t. Indexable c t => c -> Int -> t
! Int -> Int
alphaIndexToPamlIndex (R -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round R
i))
alphaToPamlVec :: Vector R -> Vector R
alphaToPamlVec :: Vector R -> Vector R
alphaToPamlVec Vector R
v = Int -> (R -> R) -> Vector R
forall d f (c :: * -> *) e. Build d f c e => d -> f -> c e
build Int
n (\R
i -> Vector R
v Vector R -> Int -> R
forall c t. Indexable c t => c -> Int -> t
! Int -> Int
pamlIndexToAlphaIndex (R -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round R
i))
pamlToAlphaMat :: Matrix R -> Matrix R
pamlToAlphaMat :: Matrix R -> Matrix R
pamlToAlphaMat Matrix R
m =
(Int, Int) -> (R -> R -> R) -> Matrix R
forall d f (c :: * -> *) e. Build d f c e => d -> f -> c e
build
(Int
n, Int
n)
( \R
i R
j -> Matrix R
m Matrix R -> Int -> Vector R
forall c t. Indexable c t => c -> Int -> t
! Int -> Int
alphaIndexToPamlIndex (R -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round R
i) Vector R -> Int -> R
forall c t. Indexable c t => c -> Int -> t
! Int -> Int
alphaIndexToPamlIndex (R -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round R
j)
)
lgExchRawPaml :: [Double]
lgExchRawPaml :: [R]
lgExchRawPaml =
[ R
0.425093,
R
0.276818,
R
0.751878,
R
0.395144,
R
0.123954,
R
5.076149,
R
2.489084,
R
0.534551,
R
0.528768,
R
0.062556,
R
0.969894,
R
2.807908,
R
1.695752,
R
0.523386,
R
0.084808,
R
1.038545,
R
0.363970,
R
0.541712,
R
5.243870,
R
0.003499,
R
4.128591,
R
2.066040,
R
0.390192,
R
1.437645,
R
0.844926,
R
0.569265,
R
0.267959,
R
0.348847,
R
0.358858,
R
2.426601,
R
4.509238,
R
0.927114,
R
0.640543,
R
4.813505,
R
0.423881,
R
0.311484,
R
0.149830,
R
0.126991,
R
0.191503,
R
0.010690,
R
0.320627,
R
0.072854,
R
0.044265,
R
0.008705,
R
0.108882,
R
0.395337,
R
0.301848,
R
0.068427,
R
0.015076,
R
0.594007,
R
0.582457,
R
0.069673,
R
0.044261,
R
0.366317,
R
4.145067,
R
0.536518,
R
6.326067,
R
2.145078,
R
0.282959,
R
0.013266,
R
3.234294,
R
1.807177,
R
0.296636,
R
0.697264,
R
0.159069,
R
0.137500,
R
1.124035,
R
0.484133,
R
0.371004,
R
0.025548,
R
0.893680,
R
1.672569,
R
0.173735,
R
0.139538,
R
0.442472,
R
4.273607,
R
6.312358,
R
0.656604,
R
0.253701,
R
0.052722,
R
0.089525,
R
0.017416,
R
1.105251,
R
0.035855,
R
0.018811,
R
0.089586,
R
0.682139,
R
1.112727,
R
2.592692,
R
0.023918,
R
1.798853,
R
1.177651,
R
0.332533,
R
0.161787,
R
0.394456,
R
0.075382,
R
0.624294,
R
0.419409,
R
0.196961,
R
0.508851,
R
0.078281,
R
0.249060,
R
0.390322,
R
0.099849,
R
0.094464,
R
4.727182,
R
0.858151,
R
4.008358,
R
1.240275,
R
2.784478,
R
1.223828,
R
0.611973,
R
1.739990,
R
0.990012,
R
0.064105,
R
0.182287,
R
0.748683,
R
0.346960,
R
0.361819,
R
1.338132,
R
2.139501,
R
0.578987,
R
2.000679,
R
0.425860,
R
1.143480,
R
1.080136,
R
0.604545,
R
0.129836,
R
0.584262,
R
1.033739,
R
0.302936,
R
1.136863,
R
2.020366,
R
0.165001,
R
0.571468,
R
6.472279,
R
0.180717,
R
0.593607,
R
0.045376,
R
0.029890,
R
0.670128,
R
0.236199,
R
0.077852,
R
0.268491,
R
0.597054,
R
0.111660,
R
0.619632,
R
0.049906,
R
0.696175,
R
2.457121,
R
0.095131,
R
0.248862,
R
0.140825,
R
0.218959,
R
0.314440,
R
0.612025,
R
0.135107,
R
1.165532,
R
0.257336,
R
0.120037,
R
0.054679,
R
5.306834,
R
0.232523,
R
0.299648,
R
0.131932,
R
0.481306,
R
7.803902,
R
0.089613,
R
0.400547,
R
0.245841,
R
3.151815,
R
2.547870,
R
0.170887,
R
0.083688,
R
0.037967,
R
1.959291,
R
0.210332,
R
0.245034,
R
0.076701,
R
0.119013,
R
10.649107,
R
1.702745,
R
0.185202,
R
1.898718,
R
0.654683,
R
0.296501,
R
0.098369,
R
2.188158,
R
0.189510,
R
0.249313
]
lgExch :: ExchangeabilityMatrix
lgExch :: Matrix R
lgExch = Matrix R -> Matrix R
pamlToAlphaMat (Matrix R -> Matrix R) -> Matrix R -> Matrix R
forall a b. (a -> b) -> a -> b
$ Int -> [R] -> Matrix R
forall a.
(RealFrac a, Container Vector a) =>
Int -> [a] -> Matrix a
exchFromListLower Int
n [R]
lgExchRawPaml
normalizeSumVec :: Vector Double -> Vector Double
normalizeSumVec :: Vector R -> Vector R
normalizeSumVec Vector R
v = (R -> R) -> Vector R -> Vector R
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
V.map (R -> R -> R
forall a. Fractional a => a -> a -> a
/ R
s) Vector R
v
where
s :: R
s = Vector R -> R
forall a. (Storable a, Num a) => Vector a -> a
V.sum Vector R
v
{-# INLINE normalizeSumVec #-}
lgStatDistPaml :: StationaryDistribution
lgStatDistPaml :: Vector R
lgStatDistPaml =
Vector R -> Vector R
normalizeSumVec (Vector R -> Vector R) -> Vector R -> Vector R
forall a b. (a -> b) -> a -> b
$
[R] -> Vector R
forall a. Storable a => [a] -> Vector a
fromList
[ R
0.079066,
R
0.055941,
R
0.041977,
R
0.053052,
R
0.012937,
R
0.040767,
R
0.071586,
R
0.057337,
R
0.022355,
R
0.062157,
R
0.099081,
R
0.064600,
R
0.022951,
R
0.042302,
R
0.044040,
R
0.061197,
R
0.053287,
R
0.012066,
R
0.034155,
R
0.069147
]
lgStatDist :: StationaryDistribution
lgStatDist :: Vector R
lgStatDist = Vector R -> Vector R
pamlToAlphaVec Vector R
lgStatDistPaml
lg :: SubstitutionModel
lg :: SubstitutionModel
lg = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
"LG" [] Vector R
lgStatDist Matrix R
lgExch
lgCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel
lgCustom :: Maybe [Char] -> Vector R -> SubstitutionModel
lgCustom Maybe [Char]
mnm Vector R
d = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
nm [] Vector R
d Matrix R
lgExch
where
nm :: [Char]
nm = [Char] -> Maybe [Char] -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
"LG-Custom" Maybe [Char]
mnm
wagExchRawPaml :: [Double]
wagExchRawPaml :: [R]
wagExchRawPaml =
[ R
55.15710,
R
50.98480,
R
63.53460,
R
73.89980,
R
14.73040,
R
542.94200,
R
102.70400,
R
52.81910,
R
26.52560,
R
3.02949,
R
90.85980,
R
303.55000,
R
154.36400,
R
61.67830,
R
9.88179,
R
158.28500,
R
43.91570,
R
94.71980,
R
617.41600,
R
2.13520,
R
546.94700,
R
141.67200,
R
58.46650,
R
112.55600,
R
86.55840,
R
30.66740,
R
33.00520,
R
56.77170,
R
31.69540,
R
213.71500,
R
395.62900,
R
93.06760,
R
24.89720,
R
429.41100,
R
57.00250,
R
24.94100,
R
19.33350,
R
18.69790,
R
55.42360,
R
3.94370,
R
17.01350,
R
11.39170,
R
12.73950,
R
3.04501,
R
13.81900,
R
39.79150,
R
49.76710,
R
13.15280,
R
8.48047,
R
38.42870,
R
86.94890,
R
15.42630,
R
6.13037,
R
49.94620,
R
317.09700,
R
90.62650,
R
535.14200,
R
301.20100,
R
47.98550,
R
7.40339,
R
389.49000,
R
258.44300,
R
37.35580,
R
89.04320,
R
32.38320,
R
25.75550,
R
89.34960,
R
68.31620,
R
19.82210,
R
10.37540,
R
39.04820,
R
154.52600,
R
31.51240,
R
17.41000,
R
40.41410,
R
425.74600,
R
485.40200,
R
93.42760,
R
21.04940,
R
10.27110,
R
9.61621,
R
4.67304,
R
39.80200,
R
9.99208,
R
8.11339,
R
4.99310,
R
67.93710,
R
105.94700,
R
211.51700,
R
8.88360,
R
119.06300,
R
143.85500,
R
67.94890,
R
19.50810,
R
42.39840,
R
10.94040,
R
93.33720,
R
68.23550,
R
24.35700,
R
69.61980,
R
9.99288,
R
41.58440,
R
55.68960,
R
17.13290,
R
16.14440,
R
337.07900,
R
122.41900,
R
397.42300,
R
107.17600,
R
140.76600,
R
102.88700,
R
70.49390,
R
134.18200,
R
74.01690,
R
31.94400,
R
34.47390,
R
96.71300,
R
49.39050,
R
54.59310,
R
161.32800,
R
212.11100,
R
55.44130,
R
203.00600,
R
37.48660,
R
51.29840,
R
85.79280,
R
82.27650,
R
22.58330,
R
47.33070,
R
145.81600,
R
32.66220,
R
138.69800,
R
151.61200,
R
17.19030,
R
79.53840,
R
437.80200,
R
11.31330,
R
116.39200,
R
7.19167,
R
12.97670,
R
71.70700,
R
21.57370,
R
15.65570,
R
33.69830,
R
26.25690,
R
21.24830,
R
66.53090,
R
13.75050,
R
51.57060,
R
152.96400,
R
13.94050,
R
52.37420,
R
11.08640,
R
24.07350,
R
38.15330,
R
108.60000,
R
32.57110,
R
54.38330,
R
22.77100,
R
19.63030,
R
10.36040,
R
387.34400,
R
42.01700,
R
39.86180,
R
13.32640,
R
42.84370,
R
645.42800,
R
21.60460,
R
78.69930,
R
29.11480,
R
248.53900,
R
200.60100,
R
25.18490,
R
19.62460,
R
15.23350,
R
100.21400,
R
30.12810,
R
58.87310,
R
18.72470,
R
11.83580,
R
782.13000,
R
180.03400,
R
30.54340,
R
205.84500,
R
64.98920,
R
31.48870,
R
23.27390,
R
138.82300,
R
36.53690,
R
31.47300
]
wagExch :: ExchangeabilityMatrix
wagExch :: Matrix R
wagExch = Matrix R -> Matrix R
pamlToAlphaMat (Matrix R -> Matrix R) -> Matrix R -> Matrix R
forall a b. (a -> b) -> a -> b
$ Int -> [R] -> Matrix R
forall a.
(RealFrac a, Container Vector a) =>
Int -> [a] -> Matrix a
exchFromListLower Int
n [R]
wagExchRawPaml
wagStatDistPaml :: StationaryDistribution
wagStatDistPaml :: Vector R
wagStatDistPaml =
Vector R -> Vector R
normalizeSumVec (Vector R -> Vector R) -> Vector R -> Vector R
forall a b. (a -> b) -> a -> b
$
[R] -> Vector R
forall a. Storable a => [a] -> Vector a
fromList
[ R
0.0866279,
R
0.043972,
R
0.0390894,
R
0.0570451,
R
0.0193078,
R
0.0367281,
R
0.0580589,
R
0.0832518,
R
0.0244313,
R
0.048466,
R
0.086209,
R
0.0620286,
R
0.0195027,
R
0.0384319,
R
0.0457631,
R
0.0695179,
R
0.0610127,
R
0.0143859,
R
0.0352742,
R
0.0708957
]
wagStatDist :: StationaryDistribution
wagStatDist :: Vector R
wagStatDist = Vector R -> Vector R
pamlToAlphaVec Vector R
wagStatDistPaml
wag :: SubstitutionModel
wag :: SubstitutionModel
wag = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
"WAG" [] Vector R
wagStatDist Matrix R
wagExch
wagCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel
wagCustom :: Maybe [Char] -> Vector R -> SubstitutionModel
wagCustom Maybe [Char]
mnm Vector R
d = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
nm [] Vector R
d Matrix R
wagExch
where
nm :: [Char]
nm = [Char] -> Maybe [Char] -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
"WAG-Custom" Maybe [Char]
mnm
matrixSetDiagToZero :: Matrix R -> Matrix R
matrixSetDiagToZero :: Matrix R -> Matrix R
matrixSetDiagToZero Matrix R
m = Matrix R
m Matrix R -> Matrix R -> Matrix R
forall a. Num a => a -> a -> a
- Vector R -> Matrix R
forall a. (Num a, Element a) => Vector a -> Matrix a
diag (Matrix R -> Vector R
forall t. Element t => Matrix t -> Vector t
takeDiag Matrix R
m)
{-# INLINE matrixSetDiagToZero #-}
uniformExch :: ExchangeabilityMatrix
uniformExch :: Matrix R
uniformExch = Matrix R -> Matrix R
matrixSetDiagToZero (Matrix R -> Matrix R) -> Matrix R -> Matrix R
forall a b. (a -> b) -> a -> b
$ Int -> [R] -> Matrix R
matrix Int
n ([R] -> Matrix R) -> [R] -> Matrix R
forall a b. (a -> b) -> a -> b
$ Int -> R -> [R]
forall a. Int -> a -> [a]
replicate (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
n) R
1.0
poissonExch :: ExchangeabilityMatrix
poissonExch :: Matrix R
poissonExch = Matrix R
uniformExch
uniformVec :: Vector Double
uniformVec :: Vector R
uniformVec = Int -> R -> Vector R
forall a. Storable a => Int -> a -> Vector a
V.replicate Int
n (R
1 R -> R -> R
forall a. Fractional a => a -> a -> a
/ Int -> R
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)
poisson :: SubstitutionModel
poisson :: SubstitutionModel
poisson = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
"Poisson" [] Vector R
uniformVec Matrix R
poissonExch
poissonCustom :: Maybe String -> StationaryDistribution -> SubstitutionModel
poissonCustom :: Maybe [Char] -> Vector R -> SubstitutionModel
poissonCustom Maybe [Char]
mnm Vector R
d = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
nm [] Vector R
d Matrix R
poissonExch
where
nm :: [Char]
nm = [Char] -> Maybe [Char] -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
"Poisson-Custom" Maybe [Char]
mnm
gtr20 :: [Double] -> StationaryDistribution -> SubstitutionModel
gtr20 :: [R] -> Vector R -> SubstitutionModel
gtr20 [R]
es Vector R
d = Alphabet
-> [Char] -> [R] -> Vector R -> Matrix R -> SubstitutionModel
substitutionModel Alphabet
Protein [Char]
"GTR" [R]
es Vector R
d Matrix R
e
where
e :: Matrix R
e = Int -> [R] -> Matrix R
forall a.
(RealFrac a, Container Vector a) =>
Int -> [a] -> Matrix a
exchFromListUpper Int
n [R]
es