-- |
-- Module      :  TLynx.Parsers
-- Description :  Parse Newick/Nexus tree files
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Wed Apr 22 13:34:35 2020.
module TLynx.Parsers
  ( parseTree,
    parseTrees,
    NewickFormat,
    newickFormat,
    newickHelp,
  )
where

import Data.List
import ELynx.Tools
import ELynx.Tree
import Options.Applicative

printError :: String -> String -> String -> IO a
printError :: String -> String -> String -> IO a
printError String
fn String
new String
nex = do
  String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Error of Newick parser: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
new String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."
  String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Error of Nexus  parser: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
nex String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."
  String -> IO a
forall a. HasCallStack => String -> a
error (String -> IO a) -> String -> IO a
forall a b. (a -> b) -> a -> b
$ String
"Could not read tree file: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
fn String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."

-- | Parse a Newick tree file or a Nexus file with Newick trees.
--
-- Error if no or more than one trees are found.
-- Error if both file formats fail to parse.
parseTree :: NewickFormat -> FilePath -> IO (Tree Phylo Name)
parseTree :: NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
fmt String
fn = do
  Either String (Tree Phylo Name)
parseResultNewick <- Parser (Tree Phylo Name)
-> String -> IO (Either String (Tree Phylo Name))
forall a. Parser a -> String -> IO (Either String a)
runParserOnFile (NewickFormat -> Parser (Tree Phylo Name)
oneNewick NewickFormat
fmt) String
fn
  case Either String (Tree Phylo Name)
parseResultNewick of
    Right Tree Phylo Name
r -> Tree Phylo Name -> IO (Tree Phylo Name)
forall (m :: * -> *) a. Monad m => a -> m a
return Tree Phylo Name
r
    Left String
eNewick -> do
      Either String [(ByteString, Tree Phylo Name)]
parseResultNexus <- Parser [(ByteString, Tree Phylo Name)]
-> String -> IO (Either String [(ByteString, Tree Phylo Name)])
forall a. Parser a -> String -> IO (Either String a)
runParserOnFile (NewickFormat -> Parser [(ByteString, Tree Phylo Name)]
nexusTrees NewickFormat
fmt) String
fn
      case Either String [(ByteString, Tree Phylo Name)]
parseResultNexus of
        Right [] -> String -> IO (Tree Phylo Name)
forall a. HasCallStack => String -> a
error (String -> IO (Tree Phylo Name)) -> String -> IO (Tree Phylo Name)
forall a b. (a -> b) -> a -> b
$ String
"No tree found in Nexus file " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
fn String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."
        Right [(ByteString
_, Tree Phylo Name
t)] -> Tree Phylo Name -> IO (Tree Phylo Name)
forall (m :: * -> *) a. Monad m => a -> m a
return Tree Phylo Name
t
        Right [(ByteString, Tree Phylo Name)]
_ -> String -> IO (Tree Phylo Name)
forall a. HasCallStack => String -> a
error (String -> IO (Tree Phylo Name)) -> String -> IO (Tree Phylo Name)
forall a b. (a -> b) -> a -> b
$ String
"More than one tree found in Nexus file " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
fn String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."
        Left String
eNexus -> String -> String -> String -> IO (Tree Phylo Name)
forall a. String -> String -> String -> IO a
printError String
fn String
eNewick String
eNexus

-- | Parse a Newick tree file or a Nexus file with Newick trees.
--
-- Error if both file formats fail to parse.
parseTrees :: NewickFormat -> FilePath -> IO (Forest Phylo Name)
parseTrees :: NewickFormat -> String -> IO (Forest Phylo Name)
parseTrees NewickFormat
fmt String
fn = do
  Either String (Forest Phylo Name)
parseResultNewick <- Parser (Forest Phylo Name)
-> String -> IO (Either String (Forest Phylo Name))
forall a. Parser a -> String -> IO (Either String a)
runParserOnFile (NewickFormat -> Parser (Forest Phylo Name)
someNewick NewickFormat
fmt) String
fn
  case Either String (Forest Phylo Name)
parseResultNewick of
    Right Forest Phylo Name
r -> Forest Phylo Name -> IO (Forest Phylo Name)
forall (m :: * -> *) a. Monad m => a -> m a
return Forest Phylo Name
r
    Left String
eNewick -> do
      Either String [(ByteString, Tree Phylo Name)]
parseResultNexus <- Parser [(ByteString, Tree Phylo Name)]
-> String -> IO (Either String [(ByteString, Tree Phylo Name)])
forall a. Parser a -> String -> IO (Either String a)
runParserOnFile (NewickFormat -> Parser [(ByteString, Tree Phylo Name)]
nexusTrees NewickFormat
fmt) String
fn
      case Either String [(ByteString, Tree Phylo Name)]
parseResultNexus of
        Right [(ByteString, Tree Phylo Name)]
r -> Forest Phylo Name -> IO (Forest Phylo Name)
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Phylo Name -> IO (Forest Phylo Name))
-> Forest Phylo Name -> IO (Forest Phylo Name)
forall a b. (a -> b) -> a -> b
$ ((ByteString, Tree Phylo Name) -> Tree Phylo Name)
-> [(ByteString, Tree Phylo Name)] -> Forest Phylo Name
forall a b. (a -> b) -> [a] -> [b]
map (ByteString, Tree Phylo Name) -> Tree Phylo Name
forall a b. (a, b) -> b
snd [(ByteString, Tree Phylo Name)]
r
        Left String
eNexus -> String -> String -> String -> IO (Forest Phylo Name)
forall a. String -> String -> String -> IO a
printError String
fn String
eNewick String
eNexus

-- | Parse 'NewickFormat'.
newickFormat :: Parser NewickFormat
newickFormat :: Parser NewickFormat
newickFormat =
  ReadM NewickFormat
-> Mod OptionFields NewickFormat -> Parser NewickFormat
forall a. ReadM a -> Mod OptionFields a -> Parser a
option ReadM NewickFormat
forall a. Read a => ReadM a
auto (Mod OptionFields NewickFormat -> Parser NewickFormat)
-> Mod OptionFields NewickFormat -> Parser NewickFormat
forall a b. (a -> b) -> a -> b
$
    String -> Mod OptionFields NewickFormat
forall (f :: * -> *) a. HasName f => String -> Mod f a
long String
"newick-format"
      Mod OptionFields NewickFormat
-> Mod OptionFields NewickFormat -> Mod OptionFields NewickFormat
forall a. Semigroup a => a -> a -> a
<> Char -> Mod OptionFields NewickFormat
forall (f :: * -> *) a. HasName f => Char -> Mod f a
short Char
'f'
      Mod OptionFields NewickFormat
-> Mod OptionFields NewickFormat -> Mod OptionFields NewickFormat
forall a. Semigroup a => a -> a -> a
<> String -> Mod OptionFields NewickFormat
forall (f :: * -> *) a. HasMetavar f => String -> Mod f a
metavar String
"FORMAT"
      Mod OptionFields NewickFormat
-> Mod OptionFields NewickFormat -> Mod OptionFields NewickFormat
forall a. Semigroup a => a -> a -> a
<> NewickFormat -> Mod OptionFields NewickFormat
forall (f :: * -> *) a. HasValue f => a -> Mod f a
value NewickFormat
Standard
      Mod OptionFields NewickFormat
-> Mod OptionFields NewickFormat -> Mod OptionFields NewickFormat
forall a. Semigroup a => a -> a -> a
<> String -> Mod OptionFields NewickFormat
forall (f :: * -> *) a. String -> Mod f a
help
        ( String
"Newick tree format: "
            String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
nwlist
            String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"; default: Standard; for detailed help, see 'tlynx --help'"
        )
  where
    nwfs :: [String]
nwfs = (NewickFormat -> String) -> [NewickFormat] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map NewickFormat -> String
forall a. Show a => a -> String
show ([NewickFormat]
forall a. (Bounded a, Enum a) => [a]
allValues :: [NewickFormat])
    nwlist :: String
nwlist = String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
", " ([String] -> [String]
forall a. [a] -> [a]
init [String]
nwfs) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
", or " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> [String] -> String
forall a. [a] -> a
last [String]
nwfs

-- | Help for different 'NewickFormat's.
newickHelp :: [String]
newickHelp :: [String]
newickHelp =
  (NewickFormat -> String) -> [NewickFormat] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map
    (String -> String
toListItem (String -> String)
-> (NewickFormat -> String) -> NewickFormat -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NewickFormat -> String
describeNewickFormat)
    ([NewickFormat]
forall a. (Bounded a, Enum a) => [a]
allValues :: [NewickFormat])
    [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String
"- Nexus file including Newick trees"]
  where
    toListItem :: String -> String
toListItem = (String
"- Newick " String -> String -> String
forall a. [a] -> [a] -> [a]
++)

-- Short description of the supported Newick formats.
describeNewickFormat :: NewickFormat -> String
describeNewickFormat :: NewickFormat -> String
describeNewickFormat NewickFormat
Standard =
  String
"Standard: Branch support values are stored in square brackets after branch lengths."
describeNewickFormat NewickFormat
IqTree =
  String
"IqTree:   Branch support values are stored as node names after the closing bracket of forests."
describeNewickFormat NewickFormat
RevBayes =
  String
"RevBayes: Key-value pairs is provided in square brackets after node names as well as branch lengths. XXX: Key value pairs are ignored at the moment."