{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  TLynx.Distance.Options
-- Description :  Options of tree-dist
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Thu Aug 29 13:02:22 2019.
module TLynx.Distance.Options
  ( DistanceArguments (..),
    DistanceMeasure (..),
    distanceArguments,
    distanceFooter,
  )
where

import qualified Data.Attoparsec.ByteString.Char8 as AC
import qualified Data.ByteString.Char8 as BS
import ELynx.Tools
import ELynx.Tree (Support (..), toSupportUnsafe)
import Options.Applicative
import TLynx.Parsers
import Text.Printf

-- | Supported distance measures.
data DistanceMeasure
  = -- | Symmetric distance.
    Symmetric
  | -- | Incompatible split distance; collapse nodes
    -- with branch support below given value.
    IncompatibleSplit Support
  | -- | Branch score distance.
    BranchScore
  deriving (DistanceMeasure -> DistanceMeasure -> Bool
(DistanceMeasure -> DistanceMeasure -> Bool)
-> (DistanceMeasure -> DistanceMeasure -> Bool)
-> Eq DistanceMeasure
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DistanceMeasure -> DistanceMeasure -> Bool
$c/= :: DistanceMeasure -> DistanceMeasure -> Bool
== :: DistanceMeasure -> DistanceMeasure -> Bool
$c== :: DistanceMeasure -> DistanceMeasure -> Bool
Eq, (forall x. DistanceMeasure -> Rep DistanceMeasure x)
-> (forall x. Rep DistanceMeasure x -> DistanceMeasure)
-> Generic DistanceMeasure
forall x. Rep DistanceMeasure x -> DistanceMeasure
forall x. DistanceMeasure -> Rep DistanceMeasure x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep DistanceMeasure x -> DistanceMeasure
$cfrom :: forall x. DistanceMeasure -> Rep DistanceMeasure x
Generic)

instance FromJSON DistanceMeasure

instance ToJSON DistanceMeasure

instance Show DistanceMeasure where
  show :: DistanceMeasure -> String
show DistanceMeasure
Symmetric = String
"Symmetric"
  show (IncompatibleSplit Support
c) = String
"Incompatible Split (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String -> Double -> String
forall r. PrintfType r => String -> r
printf String
"%.2f" (Support -> Double
fromSupport Support
c) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"
  show DistanceMeasure
BranchScore = String
"Branch Score"

-- | Arguments needed to compute distance measures.
data DistanceArguments = DistanceArguments
  { DistanceArguments -> DistanceMeasure
argsDistance :: DistanceMeasure,
    DistanceArguments -> Bool
argsNormalize :: Bool,
    DistanceArguments -> Bool
argsIntersect :: Bool,
    DistanceArguments -> Bool
argsSummaryStatistics :: Bool,
    DistanceArguments -> Maybe String
argsMasterTreeFile :: Maybe FilePath,
    DistanceArguments -> NewickFormat
argsNewickFormat :: NewickFormat,
    DistanceArguments -> [String]
argsInFiles :: [FilePath]
  }
  deriving (DistanceArguments -> DistanceArguments -> Bool
(DistanceArguments -> DistanceArguments -> Bool)
-> (DistanceArguments -> DistanceArguments -> Bool)
-> Eq DistanceArguments
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DistanceArguments -> DistanceArguments -> Bool
$c/= :: DistanceArguments -> DistanceArguments -> Bool
== :: DistanceArguments -> DistanceArguments -> Bool
$c== :: DistanceArguments -> DistanceArguments -> Bool
Eq, Int -> DistanceArguments -> ShowS
[DistanceArguments] -> ShowS
DistanceArguments -> String
(Int -> DistanceArguments -> ShowS)
-> (DistanceArguments -> String)
-> ([DistanceArguments] -> ShowS)
-> Show DistanceArguments
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DistanceArguments] -> ShowS
$cshowList :: [DistanceArguments] -> ShowS
show :: DistanceArguments -> String
$cshow :: DistanceArguments -> String
showsPrec :: Int -> DistanceArguments -> ShowS
$cshowsPrec :: Int -> DistanceArguments -> ShowS
Show, (forall x. DistanceArguments -> Rep DistanceArguments x)
-> (forall x. Rep DistanceArguments x -> DistanceArguments)
-> Generic DistanceArguments
forall x. Rep DistanceArguments x -> DistanceArguments
forall x. DistanceArguments -> Rep DistanceArguments x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep DistanceArguments x -> DistanceArguments
$cfrom :: forall x. DistanceArguments -> Rep DistanceArguments x
Generic)

instance Reproducible DistanceArguments where
  inFiles :: DistanceArguments -> [String]
inFiles DistanceArguments
a = case DistanceArguments -> Maybe String
argsMasterTreeFile DistanceArguments
a of
    Maybe String
Nothing -> DistanceArguments -> [String]
argsInFiles DistanceArguments
a
    Just String
f -> String
f String -> [String] -> [String]
forall a. a -> [a] -> [a]
: DistanceArguments -> [String]
argsInFiles DistanceArguments
a
  outSuffixes :: DistanceArguments -> [String]
outSuffixes DistanceArguments
_ = [String
".out"]
  getSeed :: DistanceArguments -> Maybe SeedOpt
getSeed DistanceArguments
_ = Maybe SeedOpt
forall a. Maybe a
Nothing
  setSeed :: DistanceArguments -> SeedOpt -> DistanceArguments
setSeed = DistanceArguments -> SeedOpt -> DistanceArguments
forall a b. a -> b -> a
const
  parser :: Parser DistanceArguments
parser = Parser DistanceArguments
distanceArguments
  cmdName :: String
cmdName = String
"distance"
  cmdDsc :: [String]
cmdDsc = [String
"Compute distances between many phylogenetic trees."]
  cmdFtr :: [String]
cmdFtr = [String]
distanceFooter

instance FromJSON DistanceArguments

instance ToJSON DistanceArguments

-- | COmmand line parser.
distanceArguments :: Parser DistanceArguments
distanceArguments :: Parser DistanceArguments
distanceArguments =
  DistanceMeasure
-> Bool
-> Bool
-> Bool
-> Maybe String
-> NewickFormat
-> [String]
-> DistanceArguments
DistanceArguments
    (DistanceMeasure
 -> Bool
 -> Bool
 -> Bool
 -> Maybe String
 -> NewickFormat
 -> [String]
 -> DistanceArguments)
-> Parser DistanceMeasure
-> Parser
     (Bool
      -> Bool
      -> Bool
      -> Maybe String
      -> NewickFormat
      -> [String]
      -> DistanceArguments)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser DistanceMeasure
distanceOpt
    Parser
  (Bool
   -> Bool
   -> Bool
   -> Maybe String
   -> NewickFormat
   -> [String]
   -> DistanceArguments)
-> Parser Bool
-> Parser
     (Bool
      -> Bool
      -> Maybe String
      -> NewickFormat
      -> [String]
      -> DistanceArguments)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Bool
normalizeSwitch
    Parser
  (Bool
   -> Bool
   -> Maybe String
   -> NewickFormat
   -> [String]
   -> DistanceArguments)
-> Parser Bool
-> Parser
     (Bool
      -> Maybe String -> NewickFormat -> [String] -> DistanceArguments)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Bool
intersectSwitch
    Parser
  (Bool
   -> Maybe String -> NewickFormat -> [String] -> DistanceArguments)
-> Parser Bool
-> Parser
     (Maybe String -> NewickFormat -> [String] -> DistanceArguments)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Bool
summaryStatisticsSwitch
    Parser
  (Maybe String -> NewickFormat -> [String] -> DistanceArguments)
-> Parser (Maybe String)
-> Parser (NewickFormat -> [String] -> DistanceArguments)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser (Maybe String)
masterTreeFile
    Parser (NewickFormat -> [String] -> DistanceArguments)
-> Parser NewickFormat -> Parser ([String] -> DistanceArguments)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser NewickFormat
newickFormat
    Parser ([String] -> DistanceArguments)
-> Parser [String] -> Parser DistanceArguments
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser String -> Parser [String]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser String
inFilesArg

masterTreeFile :: Parser (Maybe FilePath)
masterTreeFile :: Parser (Maybe String)
masterTreeFile =
  Parser String -> Parser (Maybe String)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Parser String -> Parser (Maybe String))
-> Parser String -> Parser (Maybe String)
forall a b. (a -> b) -> a -> b
$
    Mod OptionFields String -> Parser String
forall s. IsString s => Mod OptionFields s -> Parser s
strOption (Mod OptionFields String -> Parser String)
-> Mod OptionFields String -> Parser String
forall a b. (a -> b) -> a -> b
$
      String -> Mod OptionFields String
forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"master-tree-file"
        Mod OptionFields String
-> Mod OptionFields String -> Mod OptionFields String
forall a. Semigroup a => a -> a -> a
<> Char -> Mod OptionFields String
forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'm'
        Mod OptionFields String
-> Mod OptionFields String -> Mod OptionFields String
forall a. Semigroup a => a -> a -> a
<> String -> Mod OptionFields String
forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"MASTER-TREE-File"
        Mod OptionFields String
-> Mod OptionFields String -> Mod OptionFields String
forall a. Semigroup a => a -> a -> a
<> String -> Mod OptionFields String
forall (f :: * -> *) a. String -> Mod f a
help String
"Compare all trees to the tree in the master tree file."

inFilesArg :: Parser FilePath
inFilesArg :: Parser String
inFilesArg =
  Mod ArgumentFields String -> Parser String
forall s. IsString s => Mod ArgumentFields s -> Parser s
strArgument (Mod ArgumentFields String -> Parser String)
-> Mod ArgumentFields String -> Parser String
forall a b. (a -> b) -> a -> b
$
    String -> Mod ArgumentFields String
forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"INPUT-FILES"
      Mod ArgumentFields String
-> Mod ArgumentFields String -> Mod ArgumentFields String
forall a. Semigroup a => a -> a -> a
<> String -> Mod ArgumentFields String
forall (f :: * -> *) a. String -> Mod f a
help
        String
"Read tree(s) from INPUT-FILES; if more files are given, one tree is expected per file"

symmetric :: AC.Parser DistanceMeasure
symmetric :: Parser DistanceMeasure
symmetric = do
  ByteString
_ <- ByteString -> Parser ByteString
AC.string ByteString
"symmetric"
  ()
_ <- Parser ByteString ()
forall t. Chunk t => Parser t ()
AC.endOfInput
  DistanceMeasure -> Parser DistanceMeasure
forall (f :: * -> *) a. Applicative f => a -> f a
pure DistanceMeasure
Symmetric

incompatibleSplit :: AC.Parser DistanceMeasure
incompatibleSplit :: Parser DistanceMeasure
incompatibleSplit = do
  ByteString
_ <- ByteString -> Parser ByteString
AC.string ByteString
"incompatible-split"
  Char
_ <- Char -> Parser Char
AC.char Char
'['
  Double
f <- Parser Double
AC.double
  Char
_ <- Char -> Parser Char
AC.char Char
']'
  ()
_ <- Parser ByteString ()
forall t. Chunk t => Parser t ()
AC.endOfInput
  if (Double
0 Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
f) Bool -> Bool -> Bool
&& (Double
f Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
1)
    then DistanceMeasure -> Parser DistanceMeasure
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DistanceMeasure -> Parser DistanceMeasure)
-> DistanceMeasure -> Parser DistanceMeasure
forall a b. (a -> b) -> a -> b
$ Support -> DistanceMeasure
IncompatibleSplit (Support -> DistanceMeasure) -> Support -> DistanceMeasure
forall a b. (a -> b) -> a -> b
$ Double -> Support
toSupportUnsafe Double
f
    else String -> Parser DistanceMeasure
forall a. HasCallStack => String -> a
error String
"Branch support has to be in [0, 1]."

branchScore :: AC.Parser DistanceMeasure
branchScore :: Parser DistanceMeasure
branchScore = do
  ByteString
_ <- ByteString -> Parser ByteString
AC.string ByteString
"branch-score"
  ()
_ <- Parser ByteString ()
forall t. Chunk t => Parser t ()
AC.endOfInput
  DistanceMeasure -> Parser DistanceMeasure
forall (f :: * -> *) a. Applicative f => a -> f a
pure DistanceMeasure
BranchScore

-- Try first the normalized one, since the normal branch score
-- parser also succeeds in this case.
distanceParser :: AC.Parser DistanceMeasure
distanceParser :: Parser DistanceMeasure
distanceParser = Parser DistanceMeasure
symmetric Parser DistanceMeasure
-> Parser DistanceMeasure -> Parser DistanceMeasure
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser DistanceMeasure
incompatibleSplit Parser DistanceMeasure
-> Parser DistanceMeasure -> Parser DistanceMeasure
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser DistanceMeasure
branchScore

-- See 'eitherReader', but for an attoparsec parser.
eitherReadA :: AC.Parser a -> ReadM a
eitherReadA :: Parser a -> ReadM a
eitherReadA Parser a
p = (String -> Either String a) -> ReadM a
forall a. (String -> Either String a) -> ReadM a
eitherReader ((String -> Either String a) -> ReadM a)
-> (String -> Either String a) -> ReadM a
forall a b. (a -> b) -> a -> b
$ \String
input -> Parser a -> ByteString -> Either String a
forall a. Parser a -> ByteString -> Either String a
AC.parseOnly Parser a
p (String -> ByteString
BS.pack String
input)

distanceOpt :: Parser DistanceMeasure
distanceOpt :: Parser DistanceMeasure
distanceOpt =
  ReadM DistanceMeasure
-> Mod OptionFields DistanceMeasure -> Parser DistanceMeasure
forall a. ReadM a -> Mod OptionFields a -> Parser a
option (Parser DistanceMeasure -> ReadM DistanceMeasure
forall a. Parser a -> ReadM a
eitherReadA Parser DistanceMeasure
distanceParser) (Mod OptionFields DistanceMeasure -> Parser DistanceMeasure)
-> Mod OptionFields DistanceMeasure -> Parser DistanceMeasure
forall a b. (a -> b) -> a -> b
$
    String -> Mod OptionFields DistanceMeasure
forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"distance"
      Mod OptionFields DistanceMeasure
-> Mod OptionFields DistanceMeasure
-> Mod OptionFields DistanceMeasure
forall a. Semigroup a => a -> a -> a
<> Char -> Mod OptionFields DistanceMeasure
forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'd'
      Mod OptionFields DistanceMeasure
-> Mod OptionFields DistanceMeasure
-> Mod OptionFields DistanceMeasure
forall a. Semigroup a => a -> a -> a
<> String -> Mod OptionFields DistanceMeasure
forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"MEASURE"
      Mod OptionFields DistanceMeasure
-> Mod OptionFields DistanceMeasure
-> Mod OptionFields DistanceMeasure
forall a. Semigroup a => a -> a -> a
<> String -> Mod OptionFields DistanceMeasure
forall (f :: * -> *) a. String -> Mod f a
help
        String
"Type of distance to calculate (available distance measures are listed below)"

summaryStatisticsSwitch :: Parser Bool
summaryStatisticsSwitch :: Parser Bool
summaryStatisticsSwitch =
  Mod FlagFields Bool -> Parser Bool
switch (Mod FlagFields Bool -> Parser Bool)
-> Mod FlagFields Bool -> Parser Bool
forall a b. (a -> b) -> a -> b
$
    String -> Mod FlagFields Bool
forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"summary-statistics" Mod FlagFields Bool -> Mod FlagFields Bool -> Mod FlagFields Bool
forall a. Semigroup a => a -> a -> a
<> Char -> Mod FlagFields Bool
forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
's'
      Mod FlagFields Bool -> Mod FlagFields Bool -> Mod FlagFields Bool
forall a. Semigroup a => a -> a -> a
<> String -> Mod FlagFields Bool
forall (f :: * -> *) a. String -> Mod f a
help
        String
"Report summary statistics only"

normalizeSwitch :: Parser Bool
normalizeSwitch :: Parser Bool
normalizeSwitch =
  Mod FlagFields Bool -> Parser Bool
switch (Mod FlagFields Bool -> Parser Bool)
-> Mod FlagFields Bool -> Parser Bool
forall a b. (a -> b) -> a -> b
$
    String -> Mod FlagFields Bool
forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"normalize"
      Mod FlagFields Bool -> Mod FlagFields Bool -> Mod FlagFields Bool
forall a. Semigroup a => a -> a -> a
<> Char -> Mod FlagFields Bool
forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'n'
      Mod FlagFields Bool -> Mod FlagFields Bool -> Mod FlagFields Bool
forall a. Semigroup a => a -> a -> a
<> String -> Mod FlagFields Bool
forall (f :: * -> *) a. String -> Mod f a
help
        String
"Normalize trees before distance calculation; only affect distances depending on branch lengths"

intersectSwitch :: Parser Bool
intersectSwitch :: Parser Bool
intersectSwitch =
  Mod FlagFields Bool -> Parser Bool
switch (Mod FlagFields Bool -> Parser Bool)
-> Mod FlagFields Bool -> Parser Bool
forall a b. (a -> b) -> a -> b
$
    String -> Mod FlagFields Bool
forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"intersect"
      Mod FlagFields Bool -> Mod FlagFields Bool -> Mod FlagFields Bool
forall a. Semigroup a => a -> a -> a
<> Char -> Mod FlagFields Bool
forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
't'
      Mod FlagFields Bool -> Mod FlagFields Bool -> Mod FlagFields Bool
forall a. Semigroup a => a -> a -> a
<> String -> Mod FlagFields Bool
forall (f :: * -> *) a. String -> Mod f a
help
        String
"Compare intersections; i.e., before comparison, drop leaves that are not present in the other tree"

-- | Information about provided distance types.
distanceFooter :: [String]
distanceFooter :: [String]
distanceFooter =
  [ String
"Distance measures:",
    String
"  symmetric                Symmetric distance (Robinson-Foulds distance).",
    String
"  incompatible-split[VAL]  Incompatible split distance. Collapse branches with (normalized)",
    String
"                           support less than 0.0<=VAL<=1.0 before distance calculation;",
    String
"                           if, let's say, VAL>0.7, only well supported differences contribute",
    String
"                           to the total distance.",
    String
"  branch-score             Branch score distance."
  ]