{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module TLynx.Distance.Distance
( distance,
)
where
import Control.Monad
( unless,
when,
)
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import Control.Monad.Trans.Reader hiding (local)
import Data.Bifunctor
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.List hiding (intersect)
import Data.Maybe
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Vector.Unboxed as V
import ELynx.Tools
import ELynx.Tree
import Statistics.Sample
import System.IO
import TLynx.Distance.Options
import TLynx.Parsers
import Text.Printf
median :: Ord a => [a] -> a
median :: [a] -> a
median [a]
xs = [a] -> [a]
forall a. Ord a => [a] -> [a]
sort [a]
xs [a] -> Int -> a
forall a. [a] -> Int -> a
!! Int
l2 where l2 :: Int
l2 = [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2
pf :: String
pf :: String
pf = String
"%.3f"
header :: Int -> Int -> DistanceMeasure -> BL.ByteString
Int
n Int
m DistanceMeasure
d =
Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) ByteString
"Tree 1" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) ByteString
"Tree 2"
ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignRight
(Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
(String -> ByteString
BL.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ DistanceMeasure -> String
forall a. Show a => a -> String
show DistanceMeasure
d)
showTriplet ::
(PrintfArg a) => Int -> Int -> [String] -> (Int, Int, a) -> BL.ByteString
showTriplet :: Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m [String]
args (Int
i, Int
j, a
d) = ByteString
i' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
j' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
d'
where
i' :: ByteString
i' = Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack ([String]
args [String] -> Int -> String
forall a. [a] -> Int -> a
!! Int
i)
j' :: ByteString
j' = Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack ([String]
args [String] -> Int -> String
forall a. [a] -> Int -> a
!! Int
j)
d' :: ByteString
d' = Int -> ByteString -> ByteString
alignRight (Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack (String -> a -> String
forall r. PrintfType r => String -> r
printf String
pf a
d)
pairwise ::
(a -> a -> b) ->
[a] ->
[(Int, Int, b)]
pairwise :: (a -> a -> b) -> [a] -> [(Int, Int, b)]
pairwise a -> a -> b
dist [a]
trs =
[ (Int
i, Int
j, a -> a -> b
dist a
x a
y)
| (Int
i : [Int]
is, a
x : [a]
xs) <- [[Int]] -> [[a]] -> [([Int], [a])]
forall a b. [a] -> [b] -> [(a, b)]
zip ([Int] -> [[Int]]
forall a. [a] -> [[a]]
tails [Int
0 ..]) ([a] -> [[a]]
forall a. [a] -> [[a]]
tails [a]
trs),
(Int
j, a
y) <- [Int] -> [a] -> [(Int, a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
is [a]
xs
]
distance :: ELynx DistanceArguments ()
distance :: ELynx DistanceArguments ()
distance = do
DistanceArguments
l <- Environment DistanceArguments -> DistanceArguments
forall a. Environment a -> a
localArguments (Environment DistanceArguments -> DistanceArguments)
-> ReaderT
(Environment DistanceArguments) IO (Environment DistanceArguments)
-> ReaderT (Environment DistanceArguments) IO DistanceArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
(Environment DistanceArguments) IO (Environment DistanceArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
let nwFormat :: NewickFormat
nwFormat = DistanceArguments -> NewickFormat
argsNewickFormat DistanceArguments
l
Handle
outH <- String -> String -> ELynx DistanceArguments Handle
forall a. Reproducible a => String -> String -> ELynx a Handle
outHandle String
"results" String
".out"
let mname :: Maybe String
mname = DistanceArguments -> Maybe String
argsMasterTreeFile DistanceArguments
l
Maybe (Tree Phylo Name)
mtree <- case Maybe String
mname of
Maybe String
Nothing -> Maybe (Tree Phylo Name)
-> ReaderT
(Environment DistanceArguments) IO (Maybe (Tree Phylo Name))
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Tree Phylo Name)
forall a. Maybe a
Nothing
Just String
f -> do
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx DistanceArguments ())
-> String -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Read master tree from file: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
f String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."
Tree Phylo Name
t <- IO (Tree Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Tree Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Tree Phylo Name))
-> IO (Tree Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Tree Phylo Name)
forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwFormat String
f
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Compute distances between all trees and master tree."
Maybe (Tree Phylo Name)
-> ReaderT
(Environment DistanceArguments) IO (Maybe (Tree Phylo Name))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (Tree Phylo Name)
-> ReaderT
(Environment DistanceArguments) IO (Maybe (Tree Phylo Name)))
-> Maybe (Tree Phylo Name)
-> ReaderT
(Environment DistanceArguments) IO (Maybe (Tree Phylo Name))
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Maybe (Tree Phylo Name)
forall a. a -> Maybe a
Just Tree Phylo Name
t
let tfps :: [String]
tfps = DistanceArguments -> [String]
argsInFiles DistanceArguments
l
(Forest Phylo Name
trees, [String]
names) <- case [String]
tfps of
[] -> String
-> ReaderT
(Environment DistanceArguments) IO (Forest Phylo Name, [String])
forall a. HasCallStack => String -> a
error String
"No tree input files given."
[String
tf] -> do
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Read trees from single file."
Forest Phylo Name
ts <- IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name))
-> IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name)
forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Forest Phylo Name)
parseTrees NewickFormat
nwFormat String
tf
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx DistanceArguments ())
-> String -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Int -> String
forall a. Show a => a -> String
show (Forest Phylo Name -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
ts) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" trees found in file."
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Trees are indexed with integers."
(Forest Phylo Name, [String])
-> ReaderT
(Environment DistanceArguments) IO (Forest Phylo Name, [String])
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Phylo Name
ts, (Int -> String) -> [Int] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Int -> String
forall a. Show a => a -> String
show [Int
0 .. Forest Phylo Name -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
ts Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1])
[String]
_ -> do
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Read trees from files."
Forest Phylo Name
ts <- IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name))
-> IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name)
forall a b. (a -> b) -> a -> b
$ (String -> IO (Tree Phylo Name))
-> [String] -> IO (Forest Phylo Name)
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwFormat) [String]
tfps
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Trees are named according to their file names."
(Forest Phylo Name, [String])
-> ReaderT
(Environment DistanceArguments) IO (Forest Phylo Name, [String])
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Phylo Name
ts, [String]
tfps)
Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Forest Phylo Name -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null Forest Phylo Name
trees) (String -> ELynx DistanceArguments ()
forall a. HasCallStack => String -> a
error String
"Not enough trees found in files.")
Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when
(Maybe (Tree Phylo Name) -> Bool
forall a. Maybe a -> Bool
isNothing Maybe (Tree Phylo Name)
mtree Bool -> Bool -> Bool
&& Forest Phylo Name -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
trees Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1)
(String -> ELynx DistanceArguments ()
forall a. HasCallStack => String -> a
error String
"Not enough trees found in files.")
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS String
"The trees are:"
ByteString -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logDebugB (ByteString -> ELynx DistanceArguments ())
-> ByteString -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.unlines ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ (Tree Phylo Name -> ByteString)
-> Forest Phylo Name -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Tree Phylo Name -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick Forest Phylo Name
trees
let dist :: DistanceMeasure
dist = DistanceArguments -> DistanceMeasure
argsDistance DistanceArguments
l
case DistanceArguments -> DistanceMeasure
argsDistance DistanceArguments
l of
DistanceMeasure
Symmetric -> String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Use symmetric (Robinson-Foulds) distance."
IncompatibleSplit Support
val -> do
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Use incompatible split distance."
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx DistanceArguments ())
-> String -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
String
"Collapse nodes with support less than "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ Support -> String
forall a. Show a => a -> String
show Support
val
String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"."
DistanceMeasure
BranchScore -> String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Use branch score distance."
let distanceMeasure' ::
Tree Phylo Name ->
Tree Phylo Name ->
Double
distanceMeasure' :: Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure' Tree Phylo Name
t1 Tree Phylo Name
t2 = (String -> Double)
-> (Double -> Double) -> Either String Double -> Double
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Double
forall a. HasCallStack => String -> a
error Double -> Double
forall a. a -> a
id (Either String Double -> Double) -> Either String Double -> Double
forall a b. (a -> b) -> a -> b
$ case DistanceMeasure
dist of
DistanceMeasure
Symmetric -> (Int -> Double) -> Either String Int -> Either String Double
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Either String Int -> Either String Double)
-> Either String Int -> Either String Double
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Tree Phylo Name -> Either String Int
forall a e1 e2.
Ord a =>
Tree e1 a -> Tree e2 a -> Either String Int
symmetric Tree Phylo Name
t1 Tree Phylo Name
t2
IncompatibleSplit Support
val ->
(Int -> Double) -> Either String Int -> Either String Double
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Either String Int -> Either String Double)
-> Either String Int -> Either String Double
forall a b. (a -> b) -> a -> b
$
Tree Support Name -> Tree Support Name -> Either String Int
forall a e1 e2.
(Show a, Ord a) =>
Tree e1 a -> Tree e2 a -> Either String Int
incompatibleSplits
(Support -> Tree Support Name -> Tree Support Name
forall e a.
(Eq e, Eq a, HasSupport e) =>
Support -> Tree e a -> Tree e a
collapse Support
val (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Support Name -> Tree Support Name
forall e a. HasSupport e => Tree e a -> Tree e a
normalizeBranchSupport (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Support Name)
-> (Tree Support Name -> Tree Support Name)
-> Either String (Tree Support Name)
-> Tree Support Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Support Name
forall a. HasCallStack => String -> a
error Tree Support Name -> Tree Support Name
forall a. a -> a
id (Either String (Tree Support Name) -> Tree Support Name)
-> Either String (Tree Support Name) -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Support Name)
forall e a.
HasMaybeSupport e =>
Tree e a -> Either String (Tree Support a)
toSupportTree Tree Phylo Name
t1)
(Support -> Tree Support Name -> Tree Support Name
forall e a.
(Eq e, Eq a, HasSupport e) =>
Support -> Tree e a -> Tree e a
collapse Support
val (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Support Name -> Tree Support Name
forall e a. HasSupport e => Tree e a -> Tree e a
normalizeBranchSupport (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Support Name)
-> (Tree Support Name -> Tree Support Name)
-> Either String (Tree Support Name)
-> Tree Support Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Support Name
forall a. HasCallStack => String -> a
error Tree Support Name -> Tree Support Name
forall a. a -> a
id (Either String (Tree Support Name) -> Tree Support Name)
-> Either String (Tree Support Name) -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Support Name)
forall e a.
HasMaybeSupport e =>
Tree e a -> Either String (Tree Support a)
toSupportTree Tree Phylo Name
t2)
DistanceMeasure
BranchScore ->
Tree Length Name -> Tree Length Name -> Either String Double
forall e1 e2 a.
(HasLength e1, HasLength e2, Ord a) =>
Tree e1 a -> Tree e2 a -> Either String Double
branchScore
(Tree Length Name -> Tree Length Name
forall a. Tree Length a -> Tree Length a
normalizeF (Tree Length Name -> Tree Length Name)
-> Tree Length Name -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Length Name)
-> (Tree Length Name -> Tree Length Name)
-> Either String (Tree Length Name)
-> Tree Length Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Length Name
forall a. HasCallStack => String -> a
error Tree Length Name -> Tree Length Name
forall a. a -> a
id (Either String (Tree Length Name) -> Tree Length Name)
-> Either String (Tree Length Name) -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Length Name)
forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
t1)
(Tree Length Name -> Tree Length Name
forall a. Tree Length a -> Tree Length a
normalizeF (Tree Length Name -> Tree Length Name)
-> Tree Length Name -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Length Name)
-> (Tree Length Name -> Tree Length Name)
-> Either String (Tree Length Name)
-> Tree Length Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Length Name
forall a. HasCallStack => String -> a
error Tree Length Name -> Tree Length Name
forall a. a -> a
id (Either String (Tree Length Name) -> Tree Length Name)
-> Either String (Tree Length Name) -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Length Name)
forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
t2)
where
normalizeF :: Tree Length a -> Tree Length a
normalizeF = if DistanceArguments -> Bool
argsNormalize DistanceArguments
l then Tree Length a -> Tree Length a
forall e a. HasLength e => Tree e a -> Tree e a
normalizeBranchLengths else Tree Length a -> Tree Length a
forall a. a -> a
id
Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (DistanceArguments -> Bool
argsIntersect DistanceArguments
l) (ELynx DistanceArguments () -> ELynx DistanceArguments ())
-> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Intersect trees before calculation of distances."
let distanceMeasure :: Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure =
if DistanceArguments -> Bool
argsIntersect DistanceArguments
l
then
( \Tree Phylo Name
t1 Tree Phylo Name
t2 ->
let [Tree Phylo Name
t1', Tree Phylo Name
t2'] = (String -> Forest Phylo Name)
-> (Forest Phylo Name -> Forest Phylo Name)
-> Either String (Forest Phylo Name)
-> Forest Phylo Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Forest Phylo Name
forall a. HasCallStack => String -> a
error Forest Phylo Name -> Forest Phylo Name
forall a. a -> a
id (Either String (Forest Phylo Name) -> Forest Phylo Name)
-> Either String (Forest Phylo Name) -> Forest Phylo Name
forall a b. (a -> b) -> a -> b
$ Forest Phylo Name -> Either String (Forest Phylo Name)
forall e a.
(Semigroup e, Eq e, Ord a) =>
Forest e a -> Either String (Forest e a)
intersect [Tree Phylo Name
t1, Tree Phylo Name
t2]
in Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure' Tree Phylo Name
t1' Tree Phylo Name
t2'
)
else Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure'
Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (DistanceArguments -> Bool
argsNormalize DistanceArguments
l) (ELynx DistanceArguments () -> ELynx DistanceArguments ())
-> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Normalize trees before calculation of distances."
let dsTriplets :: [(Int, Int, Double)]
dsTriplets = case Maybe (Tree Phylo Name)
mtree of
Maybe (Tree Phylo Name)
Nothing -> (Tree Phylo Name -> Tree Phylo Name -> Double)
-> Forest Phylo Name -> [(Int, Int, Double)]
forall a b. (a -> a -> b) -> [a] -> [(Int, Int, b)]
pairwise Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure Forest Phylo Name
trees
Just Tree Phylo Name
masterTree -> [(Int
0, Int
i, Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure Tree Phylo Name
masterTree Tree Phylo Name
t') | (Int
i, Tree Phylo Name
t') <- [Int] -> Forest Phylo Name -> [(Int, Tree Phylo Name)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1 ..] Forest Phylo Name
trees]
ds :: [Double]
ds = ((Int, Int, Double) -> Double) -> [(Int, Int, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (\(Int
_, Int
_, Double
x) -> Double
x) [(Int, Int, Double)]
dsTriplets
dsVec :: Vector Double
dsVec = [Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
V.fromList [Double]
ds
IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
Handle -> String -> IO ()
hPutStrLn Handle
outH (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$
String
"Summary statistics of "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ DistanceMeasure -> String
forall a. Show a => a -> String
show DistanceMeasure
dist
String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" Distance:"
IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
Handle -> Text -> IO ()
T.hPutStrLn Handle
outH (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$
Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Mean: "
Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
(String -> Double -> String
forall r. PrintfType r => String -> r
printf String
pf (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean Vector Double
dsVec))
IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
Handle -> Text -> IO ()
T.hPutStrLn Handle
outH (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$
Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Median: "
Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
(String -> Double -> String
forall r. PrintfType r => String -> r
printf String
pf ([Double] -> Double
forall a. Ord a => [a] -> a
median [Double]
ds))
IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
Handle -> Text -> IO ()
T.hPutStrLn Handle
outH (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$
Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Variance: "
Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
(String -> Double -> String
forall r. PrintfType r => String -> r
printf String
pf (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
variance Vector Double
dsVec))
Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless
(DistanceArguments -> Bool
argsSummaryStatistics DistanceArguments
l)
( do
let n :: Int
n = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ Int
6 Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: (String -> Int) -> [String] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
names
m :: Int
m = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ DistanceMeasure -> String
forall a. Show a => a -> String
show DistanceMeasure
dist
IO () -> ELynx DistanceArguments ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> String -> IO ()
hPutStrLn Handle
outH String
""
IO () -> ELynx DistanceArguments ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> ByteString -> IO ()
BL.hPutStrLn Handle
outH (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Int -> DistanceMeasure -> ByteString
header Int
n Int
m DistanceMeasure
dist
case Maybe String
mname of
Maybe String
Nothing ->
IO () -> ELynx DistanceArguments ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
Handle -> ByteString -> IO ()
BL.hPutStr Handle
outH (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$
[ByteString] -> ByteString
BL.unlines
(((Int, Int, Double) -> ByteString)
-> [(Int, Int, Double)] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Int -> [String] -> (Int, Int, Double) -> ByteString
forall a.
PrintfArg a =>
Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m [String]
names) [(Int, Int, Double)]
dsTriplets)
Just String
mn ->
IO () -> ELynx DistanceArguments ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
Handle -> ByteString -> IO ()
BL.hPutStr Handle
outH (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$
[ByteString] -> ByteString
BL.unlines
(((Int, Int, Double) -> ByteString)
-> [(Int, Int, Double)] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Int -> [String] -> (Int, Int, Double) -> ByteString
forall a.
PrintfArg a =>
Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m (String
mn String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
names)) [(Int, Int, Double)]
dsTriplets)
)
IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> IO ()
hClose Handle
outH