{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}

-- |
--   Description :  Simulate reconstructed trees
--   Copyright   :  (c) Dominik Schrempf 2021
--   License     :  GPL-3.0-or-later
--
--   Maintainer  :  dominik.schrempf@gmail.com
--   Stability   :  unstable
--   Portability :  portable
--
-- Creation date: Tue Feb 27 17:27:16 2018.
--
-- See Gernhard, T. (2008). The conditioned reconstructed process. Journal of
-- Theoretical Biology, 253(4), 769–778. http://doi.org/10.1016/j.jtbi.2008.04.005.
--
-- Mon Feb 4 14:26:11 CET 2019: Adding sampling probability rho. See Article
-- (Stadler2009) Stadler, T. On incomplete sampling under birth–death models and
-- connections to the sampling-based coalescent Journal of Theoretical Biology,
-- Elsevier BV, 2009, 261, 58-66
module TLynx.Simulate.Simulate
  ( simulate,
  )
where

import Control.Concurrent (getNumCapabilities)
import Control.Concurrent.Async
  ( mapConcurrently,
  )
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Reader hiding (local)
import Control.Parallel.Strategies
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Foldable
import Data.Maybe
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import ELynx.Tools
import ELynx.Tree
import qualified ELynx.Tree.Simulate.Coalescent as CS
import qualified ELynx.Tree.Simulate.PointProcess as PP
import System.Random.MWC
import TLynx.Simulate.Options

-- | Simulate phylogenetic trees using birth and death process.
simulate :: ELynx SimulateArguments ()
simulate :: ELynx SimulateArguments ()
simulate = do
  l :: SimulateArguments
l@(SimulateArguments Int
nTrees Int
nLeaves Process
pr Maybe Double
subS Bool
sumS (Fixed Vector Word32
s)) <- Environment SimulateArguments -> SimulateArguments
forall a. Environment a -> a
localArguments (Environment SimulateArguments -> SimulateArguments)
-> ReaderT
     (Environment SimulateArguments) IO (Environment SimulateArguments)
-> ReaderT (Environment SimulateArguments) IO SimulateArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment SimulateArguments) IO (Environment SimulateArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  Int
c <- IO Int -> ReaderT (Environment SimulateArguments) IO Int
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO Int
getNumCapabilities
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection String
"Arguments"
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ SimulateArguments -> String
reportSimulateArguments SimulateArguments
l
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection String
"Simulation"
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Number of used cores: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
c
  [Gen RealWorld]
gs <- IO [Gen RealWorld]
-> ReaderT (Environment SimulateArguments) IO [Gen RealWorld]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [Gen RealWorld]
 -> ReaderT (Environment SimulateArguments) IO [Gen RealWorld])
-> IO [Gen RealWorld]
-> ReaderT (Environment SimulateArguments) IO [Gen RealWorld]
forall a b. (a -> b) -> a -> b
$ Vector Word32 -> IO (Gen (PrimState IO))
forall (m :: * -> *) (v :: * -> *).
(PrimMonad m, Vector v Word32) =>
v Word32 -> m (Gen (PrimState m))
initialize Vector Word32
s IO (Gen RealWorld)
-> (Gen RealWorld -> IO [Gen RealWorld]) -> IO [Gen RealWorld]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Gen RealWorld
gen -> Int -> Gen (PrimState IO) -> IO [Gen (PrimState IO)]
forall (m :: * -> *).
PrimMonad m =>
Int -> Gen (PrimState m) -> m [Gen (PrimState m)]
splitGen Int
c Gen RealWorld
Gen (PrimState IO)
gen
  let chunks :: [Int]
chunks = Int -> Int -> [Int]
getChunks Int
c Int
nTrees
  Forest Length Int
trs <- case Process
pr of
    (BirthDeath Double
lambda Double
mu Maybe Double
mRho TimeSpec
timeSpec) -> do
      let rho :: Double
rho = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
1.0 Maybe Double
mRho
      case Maybe Double
subS of
        Maybe Double
Nothing -> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Int)
 -> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [Gen (PrimState IO)]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
lambda Double
mu Double
rho TimeSpec
timeSpec [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs
        Just Double
p ->
          Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [Gen (PrimState IO)]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently
            Int
nLeaves
            Double
lambda
            Double
mu
            Double
rho
            Double
p
            TimeSpec
timeSpec
            [Int]
chunks
            [Gen RealWorld]
[Gen (PrimState IO)]
gs
    Process
Coalescent -> case Maybe Double
subS of
      Maybe Double
Nothing -> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Length Int)
 -> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> IO (Forest Length Int)
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> [Int] -> [Gen (PrimState IO)] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nLeaves [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs
      Just Double
p -> Int
-> Double
-> [Int]
-> [Gen (PrimState IO)]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
p [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs
  let ls :: [ByteString]
ls =
        if Bool
sumS
          then Strategy ByteString
-> (Tree Length Int -> ByteString)
-> Forest Length Int
-> [ByteString]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy ByteString
forall a. Strategy a
rpar (NChildSumStat -> ByteString
formatNChildSumStat (NChildSumStat -> ByteString)
-> (Tree Length Int -> NChildSumStat)
-> Tree Length Int
-> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Tree Length Int -> NChildSumStat
forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat) Forest Length Int
trs
          else Strategy ByteString
-> (Tree Phylo Int -> ByteString)
-> [Tree Phylo Int]
-> [ByteString]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy ByteString
forall a. Strategy a
rpar Tree Phylo Int -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick ([Tree Phylo Int] -> [ByteString])
-> [Tree Phylo Int] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Phylo Int)
-> Forest Length Int -> [Tree Phylo Int]
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Phylo Int
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Forest Length Int
trs
  let res :: ByteString
res = [ByteString] -> ByteString
BL.unlines [ByteString]
ls
  String -> ByteString -> String -> ELynx SimulateArguments ()
forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"simulated trees" ByteString
res String
".tree"

bdSimulateNTreesConcurrently ::
  Int ->
  Double ->
  Double ->
  Double ->
  PP.TimeSpec ->
  [Int] ->
  [GenIO] ->
  IO (Forest Length Int)
bdSimulateNTreesConcurrently :: Int
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [Gen (PrimState IO)]
-> IO (Forest Length Int)
bdSimulateNTreesConcurrently Int
nLeaves Double
l Double
m Double
r TimeSpec
timeSpec [Int]
chunks [Gen (PrimState IO)]
gs = do
  let l' :: Double
l' = Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
r
      m' :: Double
m' = Double
m Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1.0 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
r)
  [Forest Length Int]
trss <-
    ((Int, Gen RealWorld) -> IO (Forest Length Int))
-> [(Int, Gen RealWorld)] -> IO [Forest Length Int]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
      (\(Int
n, Gen RealWorld
g) -> Int
-> Int
-> TimeSpec
-> Double
-> Double
-> Gen (PrimState IO)
-> IO (Forest Length Int)
forall (m :: * -> *).
PrimMonad m =>
Int
-> Int
-> TimeSpec
-> Double
-> Double
-> Gen (PrimState m)
-> m (Forest Length Int)
PP.simulateNReconstructedTrees Int
n Int
nLeaves TimeSpec
timeSpec Double
l' Double
m' Gen RealWorld
Gen (PrimState IO)
g)
      ([Int] -> [Gen RealWorld] -> [(Int, Gen RealWorld)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  Forest Length Int -> IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int -> IO (Forest Length Int))
-> Forest Length Int -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ [Forest Length Int] -> Forest Length Int
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss

coalSimulateNTreesConcurrently ::
  Int ->
  [Int] ->
  [GenIO] ->
  IO (Forest Length Int)
coalSimulateNTreesConcurrently :: Int -> [Int] -> [Gen (PrimState IO)] -> IO (Forest Length Int)
coalSimulateNTreesConcurrently Int
nL [Int]
chunks [Gen (PrimState IO)]
gs = do
  [Forest Length Int]
trss <-
    ((Int, Gen RealWorld) -> IO (Forest Length Int))
-> [(Int, Gen RealWorld)] -> IO [Forest Length Int]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
      (\(Int
n, Gen RealWorld
g) -> Int -> IO (Tree Length Int) -> IO (Forest Length Int)
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
n (IO (Tree Length Int) -> IO (Forest Length Int))
-> IO (Tree Length Int) -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> Gen (PrimState IO) -> IO (Tree Length Int)
forall (m :: * -> *).
PrimMonad m =>
Int -> Gen (PrimState m) -> m (Tree Length Int)
CS.simulate Int
nL Gen RealWorld
Gen (PrimState IO)
g)
      ([Int] -> [Gen RealWorld] -> [(Int, Gen RealWorld)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  Forest Length Int -> IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int -> IO (Forest Length Int))
-> Forest Length Int -> IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ [Forest Length Int] -> Forest Length Int
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [Forest Length Int]
trss

bdSimulateAndSubSampleNTreesConcurrently ::
  Int ->
  Double ->
  Double ->
  Double ->
  Double ->
  PP.TimeSpec ->
  [Int] ->
  [GenIO] ->
  ELynx SimulateArguments (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently :: Int
-> Double
-> Double
-> Double
-> Double
-> TimeSpec
-> [Int]
-> [Gen (PrimState IO)]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
bdSimulateAndSubSampleNTreesConcurrently Int
nLeaves Double
l Double
m Double
r Double
p TimeSpec
timeSpec [Int]
chunks [Gen (PrimState IO)]
gs = do
  let nLeavesBigTree :: Int
nLeavesBigTree = (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nLeaves Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
      l' :: Double
l' = Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
r
      m' :: Double
m' = Double
m Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
l Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1.0 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
r)
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    String
"Simulate one big tree with "
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nLeavesBigTree
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
  Tree Length Int
tr <- IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Length Int)
 -> ReaderT (Environment SimulateArguments) IO (Tree Length Int))
-> IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall a b. (a -> b) -> a -> b
$ Int
-> TimeSpec
-> Double
-> Double
-> Gen (PrimState IO)
-> IO (Tree Length Int)
forall (m :: * -> *).
PrimMonad m =>
Int
-> TimeSpec
-> Double
-> Double
-> Gen (PrimState m)
-> m (Tree Length Int)
PP.simulateReconstructedTree Int
nLeavesBigTree TimeSpec
timeSpec Double
l' Double
m' ([Gen RealWorld] -> Gen RealWorld
forall a. [a] -> a
head [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  -- Log the base tree.
  ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ Tree Phylo Int -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick (Tree Phylo Int -> ByteString) -> Tree Phylo Int -> ByteString
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> Tree Phylo Int
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Tree Length Int
tr
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    String
"Sub sample "
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" trees with "
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nLeaves
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
  let lvs :: Seq Int
lvs = [Int] -> Seq Int
forall a. [a] -> Seq a
Seq.fromList ([Int] -> Seq Int) -> [Int] -> Seq Int
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> [Int]
forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
  [[Maybe (Tree Length Int)]]
trss <-
    IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Maybe (Tree Length Int)]]
 -> ReaderT
      (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]])
-> IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall a b. (a -> b) -> a -> b
$
      ((Int, Gen RealWorld) -> IO [Maybe (Tree Length Int)])
-> [(Int, Gen RealWorld)] -> IO [[Maybe (Tree Length Int)]]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
        (\(Int
nSamples, Gen RealWorld
g) -> Int
-> Seq Int
-> Int
-> Tree Length Int
-> Gen (PrimState IO)
-> IO [Maybe (Tree Length Int)]
forall a e.
Ord a =>
Int
-> Seq a
-> Int
-> Tree e a
-> Gen (PrimState IO)
-> IO [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nLeaves Tree Length Int
tr Gen RealWorld
Gen (PrimState IO)
g)
        ([Int] -> [Gen RealWorld] -> [(Int, Gen RealWorld)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  let trs :: Forest Length Int
trs = [Maybe (Tree Length Int)] -> Forest Length Int
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Tree Length Int)] -> Forest Length Int)
-> [Maybe (Tree Length Int)] -> Forest Length Int
forall a b. (a -> b) -> a -> b
$ [[Maybe (Tree Length Int)]] -> [Maybe (Tree Length Int)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
  Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int
 -> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Length Int)
-> Forest Length Int -> Forest Length Int
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Length Int
forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs

coalSimulateAndSubSampleNTreesConcurrently ::
  Int ->
  Double ->
  [Int] ->
  [GenIO] ->
  ELynx SimulateArguments (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently :: Int
-> Double
-> [Int]
-> [Gen (PrimState IO)]
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
coalSimulateAndSubSampleNTreesConcurrently Int
nL Double
p [Int]
chunks [Gen (PrimState IO)]
gs = do
  let nLeavesBigTree :: Int
nLeavesBigTree = (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nL Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
p) :: Int
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    String
"Simulate one big tree with "
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nLeavesBigTree
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
  Tree Length Int
tr <- IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Length Int)
 -> ReaderT (Environment SimulateArguments) IO (Tree Length Int))
-> IO (Tree Length Int)
-> ReaderT (Environment SimulateArguments) IO (Tree Length Int)
forall a b. (a -> b) -> a -> b
$ Int -> Gen (PrimState IO) -> IO (Tree Length Int)
forall (m :: * -> *).
PrimMonad m =>
Int -> Gen (PrimState m) -> m (Tree Length Int)
CS.simulate Int
nLeavesBigTree ([Gen RealWorld] -> Gen RealWorld
forall a. [a] -> a
head [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  -- Log the base tree.
  ByteString -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logInfoB (ByteString -> ELynx SimulateArguments ())
-> ByteString -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$ Tree Phylo Int -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick (Tree Phylo Int -> ByteString) -> Tree Phylo Int -> ByteString
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> Tree Phylo Int
forall e a. HasMaybeLength e => Tree e a -> Tree Phylo a
lengthToPhyloTree Tree Length Int
tr
  String -> ELynx SimulateArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoNewSection (String -> ELynx SimulateArguments ())
-> String -> ELynx SimulateArguments ()
forall a b. (a -> b) -> a -> b
$
    String
"Sub sample "
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show ([Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
chunks)
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" trees with "
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
nL
      String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" leaves."
  let lvs :: Seq Int
lvs = [Int] -> Seq Int
forall a. [a] -> Seq a
Seq.fromList ([Int] -> Seq Int) -> [Int] -> Seq Int
forall a b. (a -> b) -> a -> b
$ Tree Length Int -> [Int]
forall e a. Tree e a -> [a]
leaves Tree Length Int
tr
  [[Maybe (Tree Length Int)]]
trss <-
    IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Maybe (Tree Length Int)]]
 -> ReaderT
      (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]])
-> IO [[Maybe (Tree Length Int)]]
-> ReaderT
     (Environment SimulateArguments) IO [[Maybe (Tree Length Int)]]
forall a b. (a -> b) -> a -> b
$
      ((Int, Gen RealWorld) -> IO [Maybe (Tree Length Int)])
-> [(Int, Gen RealWorld)] -> IO [[Maybe (Tree Length Int)]]
forall (t :: * -> *) a b.
Traversable t =>
(a -> IO b) -> t a -> IO (t b)
mapConcurrently
        (\(Int
nSamples, Gen RealWorld
g) -> Int
-> Seq Int
-> Int
-> Tree Length Int
-> Gen (PrimState IO)
-> IO [Maybe (Tree Length Int)]
forall a e.
Ord a =>
Int
-> Seq a
-> Int
-> Tree e a
-> Gen (PrimState IO)
-> IO [Maybe (Tree e a)]
nSubSamples Int
nSamples Seq Int
lvs Int
nL Tree Length Int
tr Gen RealWorld
Gen (PrimState IO)
g)
        ([Int] -> [Gen RealWorld] -> [(Int, Gen RealWorld)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
chunks [Gen RealWorld]
[Gen (PrimState IO)]
gs)
  let trs :: Forest Length Int
trs = [Maybe (Tree Length Int)] -> Forest Length Int
forall a. [Maybe a] -> [a]
catMaybes ([Maybe (Tree Length Int)] -> Forest Length Int)
-> [Maybe (Tree Length Int)] -> Forest Length Int
forall a b. (a -> b) -> a -> b
$ [[Maybe (Tree Length Int)]] -> [Maybe (Tree Length Int)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Maybe (Tree Length Int)]]
trss
  Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Length Int
 -> ReaderT (Environment SimulateArguments) IO (Forest Length Int))
-> Forest Length Int
-> ReaderT (Environment SimulateArguments) IO (Forest Length Int)
forall a b. (a -> b) -> a -> b
$ (Tree Length Int -> Tree Length Int)
-> Forest Length Int -> Forest Length Int
forall a b. (a -> b) -> [a] -> [b]
map Tree Length Int -> Tree Length Int
forall e a. Semigroup e => Tree e a -> Tree e a
prune Forest Length Int
trs

-- Extract a random subtree with @N@ leaves of a tree with @M@ leaves, where
-- @M>N@ (otherwise error). The complete list of leaves (names are assumed to be
-- unique) has to be provided as a 'Seq.Seq', and a 'Seq.Set', so that fast
-- sub-sampling as well as lookup are fast and so that these data structures do
-- not have to be recomputed when many sub-samples are requested.
nSubSamples ::
  Ord a =>
  Int ->
  Seq.Seq a ->
  Int ->
  Tree e a ->
  GenIO ->
  IO [Maybe (Tree e a)]
nSubSamples :: Int
-> Seq a
-> Int
-> Tree e a
-> Gen (PrimState IO)
-> IO [Maybe (Tree e a)]
nSubSamples Int
m Seq a
lvs Int
n Tree e a
tree Gen (PrimState IO)
g
  | Seq a -> Int
forall a. Seq a -> Int
Seq.length Seq a
lvs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n =
    String -> IO [Maybe (Tree e a)]
forall a. HasCallStack => String -> a
error
      String
"Given list of leaves is shorter than requested number of leaves."
  | Bool
otherwise = do
    [[a]]
lss <- [a] -> Int -> Int -> Gen (PrimState IO) -> IO [[a]]
forall (m :: * -> *) a.
PrimMonad m =>
[a] -> Int -> Int -> Gen (PrimState m) -> m [[a]]
grabble (Seq a -> [a]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList Seq a
lvs) Int
m Int
n Gen (PrimState IO)
g
    let lsSets :: [Set a]
lsSets = ([a] -> Set a) -> [[a]] -> [Set a]
forall a b. (a -> b) -> [a] -> [b]
map [a] -> Set a
forall a. Ord a => [a] -> Set a
Set.fromList [[a]]
lss
    [Maybe (Tree e a)] -> IO [Maybe (Tree e a)]
forall (m :: * -> *) a. Monad m => a -> m a
return [(a -> Bool) -> Tree e a -> Maybe (Tree e a)
forall a e. (a -> Bool) -> Tree e a -> Maybe (Tree e a)
dropLeavesWith (a -> Set a -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.notMember` Set a
ls) Tree e a
tree | Set a
ls <- [Set a]
lsSets]

-- Pair of branch length with number of extant children.
type BrLnNChildren = (Length, Int)

-- Possible summary statistic of phylogenetic trees. A list of tuples
-- (Length, NumberOfExtantChildrenBelowThisBranch).
type NChildSumStat = [BrLnNChildren]

-- Format the summary statistics in the following form:
--
-- @
--   nLeaves1 branchLength1
--   nLeaves2 branchLength2
--   ....
-- @
formatNChildSumStat :: NChildSumStat -> BL.ByteString
formatNChildSumStat :: NChildSumStat -> ByteString
formatNChildSumStat NChildSumStat
s =
  Builder -> ByteString
BB.toLazyByteString (Builder -> ByteString)
-> ([Builder] -> Builder) -> [Builder] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> ByteString) -> [Builder] -> ByteString
forall a b. (a -> b) -> a -> b
$ (BrLnNChildren -> Builder) -> NChildSumStat -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map BrLnNChildren -> Builder
formatNChildSumStatLine NChildSumStat
s

formatNChildSumStatLine :: BrLnNChildren -> BB.Builder
formatNChildSumStatLine :: BrLnNChildren -> Builder
formatNChildSumStatLine (Length
l, Int
n) =
  Int -> Builder
BB.intDec Int
n Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
' ' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Double -> Builder
BB.doubleDec (Length -> Double
fromLength Length
l) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
'\n'

-- Compute NChilSumStat for a phylogenetic tree.
toNChildSumStat :: HasLength e => Tree e a -> NChildSumStat
toNChildSumStat :: Tree e a -> NChildSumStat
toNChildSumStat (Node e
br a
_ []) = [(e -> Length
forall e. HasLength e => e -> Length
getLength e
br, Int
1)]
toNChildSumStat (Node e
br a
_ [Tree e a]
ts) = (e -> Length
forall e. HasLength e => e -> Length
getLength e
br, Int
sumCh) BrLnNChildren -> NChildSumStat -> NChildSumStat
forall a. a -> [a] -> [a]
: [NChildSumStat] -> NChildSumStat
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [NChildSumStat]
nChSS
  where
    nChSS :: [NChildSumStat]
nChSS = (Tree e a -> NChildSumStat) -> [Tree e a] -> [NChildSumStat]
forall a b. (a -> b) -> [a] -> [b]
map Tree e a -> NChildSumStat
forall e a. HasLength e => Tree e a -> NChildSumStat
toNChildSumStat [Tree e a]
ts
    sumCh :: Int
sumCh = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (NChildSumStat -> Int) -> [NChildSumStat] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (BrLnNChildren -> Int
forall a b. (a, b) -> b
snd (BrLnNChildren -> Int)
-> (NChildSumStat -> BrLnNChildren) -> NChildSumStat -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NChildSumStat -> BrLnNChildren
forall a. [a] -> a
head) [NChildSumStat]
nChSS