-- | Functions for parsing, processing and visualization of taxonomy data.
--
-- === Usage example:
-- * Read in taxonomy data
--
--     > eitherTaxtree <- readNamedTaxonomy "/path/to/NCBI_taxonomydump_directory"
--
-- * Process data
--
--     > let subtree = extractTaxonomySubTreebyLevel [562] (fromRight eitherTaxTree) (Just 4)
--
-- * Visualize result
--
--     tput "/path/to/dotdirectory" subtree
module Biobase.Taxonomy.Import (  -- * Datatypes
                       -- Datatypes used to represent taxonomy data
                       module Biobase.Taxonomy.Types,
                       -- * Parsing
                       -- Functions prefixed with "read" read from filepaths, functions with parse from Haskell Strings.
                       readTaxonomy,
                       readNamedTaxonomy,
                       parseTaxonomy,
                       parseNCBITaxCitations,
                       readNCBITaxCitations,
                       parseNCBITaxDelNodes,
                       readNCBITaxDelNodes,
                       parseNCBITaxDivisions,
                       readNCBITaxDivisions,
                       parseNCBITaxGenCodes,
                       readNCBITaxGenCodes,
                       parseNCBITaxMergedNodes,
                       readNCBITaxMergedNodes,
                       parseNCBITaxNames,
                       readNCBITaxNames,
                       parseNCBITaxNodes,
                       readNCBITaxNodes,
                       parseNCBISimpleTaxons,
                       readNCBISimpleTaxons,
                       readNCBITaxonomyDatabase
                      ) where
import Prelude
import System.IO
import Biobase.Taxonomy.Types
import Text.Parsec.Prim (runP)
import Text.ParserCombinators.Parsec
import Control.Monad
import Data.List
import Data.Maybe
import qualified Data.Either.Unwrap as E
import Data.Graph.Inductive.Graph
import Data.Graph.Inductive.Tree
import qualified Data.ByteString.Char8 as B
import qualified Data.Text.Lazy as T
--------------------------------------------------------

---------------------------------------
-- Parsing functions

-- | NCBI taxonomy dump nodes and names in the input directory path are parsed and a SimpleTaxon tree with scientific names for each node is generated.
readNamedTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))
readNamedTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))
readNamedTaxonomy String
directoryPath = do
  Either ParseError [TaxName]
nodeNames <- String -> IO (Either ParseError [TaxName])
readNCBITaxNames (String
directoryPath String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"names.dmp")
  if Either ParseError [TaxName] -> Bool
forall a b. Either a b -> Bool
E.isLeft Either ParseError [TaxName]
nodeNames
     then Either ParseError (Gr SimpleTaxon Double)
-> IO (Either ParseError (Gr SimpleTaxon Double))
forall (m :: * -> *) a. Monad m => a -> m a
return (ParseError -> Either ParseError (Gr SimpleTaxon Double)
forall a b. a -> Either a b
Left (Either ParseError [TaxName] -> ParseError
forall a b. Either a b -> a
E.fromLeft Either ParseError [TaxName]
nodeNames))
     else do
       let rightNodeNames :: [TaxName]
rightNodeNames = Either ParseError [TaxName] -> [TaxName]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxName]
nodeNames
       let filteredNodeNames :: [TaxName]
filteredNodeNames = (TaxName -> Bool) -> [TaxName] -> [TaxName]
forall a. (a -> Bool) -> [a] -> [a]
filter TaxName -> Bool
isScientificName [TaxName]
rightNodeNames
       let namedTaxonomyGraph :: GenParser Char st (Gr SimpleTaxon Double)
namedTaxonomyGraph = [TaxName] -> GenParser Char st (Gr SimpleTaxon Double)
forall st. [TaxName] -> GenParser Char st (Gr SimpleTaxon Double)
genParserNamedTaxonomyGraph [TaxName]
filteredNodeNames
       Parser (Gr SimpleTaxon Double)
-> String -> IO (Either ParseError (Gr SimpleTaxon Double))
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 Parser (Gr SimpleTaxon Double)
forall st. GenParser Char st (Gr SimpleTaxon Double)
namedTaxonomyGraph (String
directoryPath String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"nodes.dmp")

isScientificName :: TaxName -> Bool
isScientificName :: TaxName -> Bool
isScientificName TaxName
name = TaxName -> ByteString
nameClass TaxName
name ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
scientificNameT
  where scientificNameT :: ByteString
scientificNameT = String -> ByteString
B.pack String
"scientific name"

-- | NCBI taxonomy dump nodes and names in the input directory path are parsed and a SimpleTaxon tree is generated.
readTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))
readTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))
readTaxonomy = Parser (Gr SimpleTaxon Double)
-> String -> IO (Either ParseError (Gr SimpleTaxon Double))
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 Parser (Gr SimpleTaxon Double)
forall st. GenParser Char st (Gr SimpleTaxon Double)
genParserTaxonomyGraph

-- | NCBI taxonomy dump nodes and names in the input directory path are parsed and a SimpleTaxon tree is generated.
parseTaxonomy :: String -> Either ParseError (Gr SimpleTaxon Double)
parseTaxonomy :: String -> Either ParseError (Gr SimpleTaxon Double)
parseTaxonomy = Parser (Gr SimpleTaxon Double)
-> String -> String -> Either ParseError (Gr SimpleTaxon Double)
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parser (Gr SimpleTaxon Double)
forall st. GenParser Char st (Gr SimpleTaxon Double)
genParserTaxonomyGraph String
"parseTaxonomy"

genParserTaxonomyGraph :: GenParser Char st (Gr SimpleTaxon Double)
genParserTaxonomyGraph :: GenParser Char st (Gr SimpleTaxon Double)
genParserTaxonomyGraph = do
  [((Int, SimpleTaxon), (Int, Int, Double))]
nodesEdges <- ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
-> ParsecT
     String st Identity [((Int, SimpleTaxon), (Int, Int, Double))]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
-> ParsecT
     String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
forall tok st a. GenParser tok st a -> GenParser tok st a
try ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
forall st.
GenParser Char st ((Int, SimpleTaxon), (Int, Int, Double))
genParserGraphNodeEdge)
  ParsecT String st Identity () -> ParsecT String st Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional ParsecT String st Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
  let ([(Int, SimpleTaxon)]
nodesList,[(Int, Int, Double)]
edgesList) =  [((Int, SimpleTaxon), (Int, Int, Double))]
-> ([(Int, SimpleTaxon)], [(Int, Int, Double)])
forall a b. [(a, b)] -> ([a], [b])
unzip [((Int, SimpleTaxon), (Int, Int, Double))]
nodesEdges
  --let taxedges = filter (\(a,b,_) -> a /= b) edgesList
  let taxedges :: [(Int, Int, Double)]
taxedges = ((Int, Int, Double) -> Bool)
-> [(Int, Int, Double)] -> [(Int, Int, Double)]
forall a. (a -> Bool) -> [a] -> [a]
filter (Int, Int, Double) -> Bool
forall a. (Int, Int, a) -> Bool
notLoopEdge  [(Int, Int, Double)]
edgesList
  --let taxnodes = concat nodesList
  --return (mkGraph taxnodes taxedges)
  let currentGraph :: Gr SimpleTaxon Double
currentGraph = [(Int, SimpleTaxon)]
-> [(Int, Int, Double)] -> Gr SimpleTaxon Double
forall (gr :: * -> * -> *) a b.
Graph gr =>
[LNode a] -> [LEdge b] -> gr a b
mkGraph [(Int, SimpleTaxon)]
nodesList [(Int, Int, Double)]
taxedges
  Gr SimpleTaxon Double -> GenParser Char st (Gr SimpleTaxon Double)
forall (m :: * -> *) a. Monad m => a -> m a
return Gr SimpleTaxon Double
currentGraph


notLoopEdge :: (Int,Int,a) -> Bool
notLoopEdge :: (Int, Int, a) -> Bool
notLoopEdge (Int
a,Int
b,a
_) = Int
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
b

--genParserNodeEdges :: [TaxName] -> GenParser Char st [(Int,SimpleTaxon),(Int,Int,Double)]
--genParserNodeEdges = do
--  nodesEdges <- (many1 (try genParserGraphNodeEdge))
--  optional eof
--  return (nodesList,edgesList)


  --let taxedges = filter notLoopEdge edgesList
  --let taxnamednodes = map (setNodeScientificName filteredNodeNames) nodesList
  --let currentGraph = mkGraph taxnamednodes taxedges
  --return currentGraph

genParserNamedTaxonomyGraph :: [TaxName] -> GenParser Char st (Gr SimpleTaxon Double)
genParserNamedTaxonomyGraph :: [TaxName] -> GenParser Char st (Gr SimpleTaxon Double)
genParserNamedTaxonomyGraph [TaxName]
filteredNodeNames = do
  [((Int, SimpleTaxon), (Int, Int, Double))]
nodesEdges <- (ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
-> ParsecT
     String st Identity [((Int, SimpleTaxon), (Int, Int, Double))]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
-> ParsecT
     String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
forall tok st a. GenParser tok st a -> GenParser tok st a
try ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
forall st.
GenParser Char st ((Int, SimpleTaxon), (Int, Int, Double))
genParserGraphNodeEdge))
  ParsecT String st Identity () -> ParsecT String st Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional ParsecT String st Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
  let ([(Int, SimpleTaxon)]
nodesList,[(Int, Int, Double)]
edgesList) = [((Int, SimpleTaxon), (Int, Int, Double))]
-> ([(Int, SimpleTaxon)], [(Int, Int, Double)])
forall a b. [(a, b)] -> ([a], [b])
unzip [((Int, SimpleTaxon), (Int, Int, Double))]
nodesEdges
  let taxedges :: [(Int, Int, Double)]
taxedges = ((Int, Int, Double) -> Bool)
-> [(Int, Int, Double)] -> [(Int, Int, Double)]
forall a. (a -> Bool) -> [a] -> [a]
filter (Int, Int, Double) -> Bool
forall a. (Int, Int, a) -> Bool
notLoopEdge [(Int, Int, Double)]
edgesList
  let taxnamednodes :: [(Int, SimpleTaxon)]
taxnamednodes = ((Int, SimpleTaxon) -> (Int, SimpleTaxon))
-> [(Int, SimpleTaxon)] -> [(Int, SimpleTaxon)]
forall a b. (a -> b) -> [a] -> [b]
map ([TaxName] -> (Int, SimpleTaxon) -> (Int, SimpleTaxon)
forall t. [TaxName] -> (t, SimpleTaxon) -> (t, SimpleTaxon)
setNodeScientificName [TaxName]
filteredNodeNames) [(Int, SimpleTaxon)]
nodesList
  let currentGraph :: Gr SimpleTaxon Double
currentGraph = [(Int, SimpleTaxon)]
-> [(Int, Int, Double)] -> Gr SimpleTaxon Double
forall (gr :: * -> * -> *) a b.
Graph gr =>
[LNode a] -> [LEdge b] -> gr a b
mkGraph [(Int, SimpleTaxon)]
taxnamednodes [(Int, Int, Double)]
taxedges
  Gr SimpleTaxon Double -> GenParser Char st (Gr SimpleTaxon Double)
forall (m :: * -> *) a. Monad m => a -> m a
return Gr SimpleTaxon Double
currentGraph

setNodeScientificName :: [TaxName] -> (t, SimpleTaxon) -> (t, SimpleTaxon)
setNodeScientificName :: [TaxName] -> (t, SimpleTaxon) -> (t, SimpleTaxon)
setNodeScientificName [TaxName]
inputTaxNames (t
inputNode,SimpleTaxon
inputTaxon) = (t, SimpleTaxon)
outputNode
  where maybeRetrievedName :: Maybe TaxName
maybeRetrievedName = (TaxName -> Bool) -> [TaxName] -> Maybe TaxName
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (SimpleTaxon -> TaxName -> Bool
isTaxNameIdSimpleTaxid SimpleTaxon
inputTaxon) [TaxName]
inputTaxNames
        retrievedName :: Text
retrievedName = Text -> (TaxName -> Text) -> Maybe TaxName -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (String -> Text
T.pack String
"no name") TaxName -> Text
nameTxt Maybe TaxName
maybeRetrievedName
        outputNode :: (t, SimpleTaxon)
outputNode = (t
inputNode,SimpleTaxon
inputTaxon{$sel:simpleScientificName:SimpleTaxon :: Text
simpleScientificName = Text
retrievedName})

isTaxNameIdSimpleTaxid :: SimpleTaxon -> TaxName -> Bool
isTaxNameIdSimpleTaxid :: SimpleTaxon -> TaxName -> Bool
isTaxNameIdSimpleTaxid SimpleTaxon
inputTaxon TaxName
inputTaxName = TaxName -> Int
nameTaxId TaxName
inputTaxName Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== SimpleTaxon -> Int
simpleTaxId SimpleTaxon
inputTaxon


genParserGraphNodeEdge :: GenParser Char st ((Int,SimpleTaxon),(Int,Int,Double))
genParserGraphNodeEdge :: GenParser Char st ((Int, SimpleTaxon), (Int, Int, Double))
genParserGraphNodeEdge = do
  String
_simpleTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_simpleParentTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_simpleRank <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\n")
  Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n'
  let _simpleTaxIdInt :: Int
_simpleTaxIdInt = String -> Int
readInt String
_simpleTaxId
  let _simpleParentTaxIdInt :: Int
_simpleParentTaxIdInt = String -> Int
readInt String
_simpleParentTaxId
  ((Int, SimpleTaxon), (Int, Int, Double))
-> GenParser Char st ((Int, SimpleTaxon), (Int, Int, Double))
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int
_simpleTaxIdInt,Int -> Text -> Int -> Rank -> SimpleTaxon
SimpleTaxon Int
_simpleTaxIdInt Text
T.empty Int
_simpleParentTaxIdInt (String -> Rank
readRank String
_simpleRank)),(Int
_simpleTaxIdInt,Int
_simpleParentTaxIdInt,Double
1 :: Double))

-- | parse NCBITaxCitations from input string
parseNCBITaxCitations :: String -> Either ParseError [TaxCitation]
parseNCBITaxCitations :: String -> Either ParseError [TaxCitation]
parseNCBITaxCitations = Parsec String () [TaxCitation]
-> String -> String -> Either ParseError [TaxCitation]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxCitation]
forall st. GenParser Char st [TaxCitation]
genParserNCBITaxCitations String
"parseTaxCitations"

-- | parse NCBITaxCitations from input filePath
readNCBITaxCitations :: String -> IO (Either ParseError [TaxCitation])
readNCBITaxCitations :: String -> IO (Either ParseError [TaxCitation])
readNCBITaxCitations = Parsec String () [TaxCitation]
-> String -> IO (Either ParseError [TaxCitation])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 Parsec String () [TaxCitation]
forall st. GenParser Char st [TaxCitation]
genParserNCBITaxCitations

-- | parse NCBITaxDelNodes from input string
parseNCBITaxDelNodes :: String -> Either ParseError [TaxDelNode]
parseNCBITaxDelNodes :: String -> Either ParseError [TaxDelNode]
parseNCBITaxDelNodes = Parsec String () [TaxDelNode]
-> String -> String -> Either ParseError [TaxDelNode]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxDelNode]
forall st. GenParser Char st [TaxDelNode]
genParserNCBITaxDelNodes String
"parseTaxDelNodes"

-- | parse NCBITaxDelNodes from input filePath
readNCBITaxDelNodes :: String -> IO (Either ParseError [TaxDelNode])
readNCBITaxDelNodes :: String -> IO (Either ParseError [TaxDelNode])
readNCBITaxDelNodes = Parsec String () [TaxDelNode]
-> String -> IO (Either ParseError [TaxDelNode])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxDelNode]
forall st. GenParser Char st [TaxDelNode]
genParserNCBITaxDelNodes

-- | parse NCBITaxDivisons from input string
parseNCBITaxDivisions :: String -> Either ParseError [TaxDivision]
parseNCBITaxDivisions :: String -> Either ParseError [TaxDivision]
parseNCBITaxDivisions = Parsec String () [TaxDivision]
-> String -> String -> Either ParseError [TaxDivision]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxDivision]
forall st. GenParser Char st [TaxDivision]
genParserNCBITaxDivisons String
"parseTaxDivisons"

-- | parse NCBITaxDivisons from input filePath
readNCBITaxDivisions :: String -> IO (Either ParseError [TaxDivision])
readNCBITaxDivisions :: String -> IO (Either ParseError [TaxDivision])
readNCBITaxDivisions = Parsec String () [TaxDivision]
-> String -> IO (Either ParseError [TaxDivision])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxDivision]
forall st. GenParser Char st [TaxDivision]
genParserNCBITaxDivisons

-- | parse NCBITaxGenCodes from input string
parseNCBITaxGenCodes :: String -> Either ParseError [TaxGenCode]
parseNCBITaxGenCodes :: String -> Either ParseError [TaxGenCode]
parseNCBITaxGenCodes = Parsec String () [TaxGenCode]
-> String -> String -> Either ParseError [TaxGenCode]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxGenCode]
forall st. GenParser Char st [TaxGenCode]
genParserNCBITaxGenCodes String
"parseTaxGenCodes"

-- | parse NCBITaxGenCodes from input filePath
readNCBITaxGenCodes :: String -> IO (Either ParseError [TaxGenCode])
readNCBITaxGenCodes :: String -> IO (Either ParseError [TaxGenCode])
readNCBITaxGenCodes = Parsec String () [TaxGenCode]
-> String -> IO (Either ParseError [TaxGenCode])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxGenCode]
forall st. GenParser Char st [TaxGenCode]
genParserNCBITaxGenCodes

-- | parse NCBITaxMergedNodes from input string
parseNCBITaxMergedNodes :: String -> Either ParseError [TaxMergedNode]
parseNCBITaxMergedNodes :: String -> Either ParseError [TaxMergedNode]
parseNCBITaxMergedNodes = Parsec String () [TaxMergedNode]
-> String -> String -> Either ParseError [TaxMergedNode]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxMergedNode]
forall st. GenParser Char st [TaxMergedNode]
genParserNCBITaxMergedNodes String
"parseTaxMergedNodes"

-- | parse NCBITaxMergedNodes from input filePath
readNCBITaxMergedNodes :: String -> IO (Either ParseError [TaxMergedNode])
readNCBITaxMergedNodes :: String -> IO (Either ParseError [TaxMergedNode])
readNCBITaxMergedNodes = Parsec String () [TaxMergedNode]
-> String -> IO (Either ParseError [TaxMergedNode])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxMergedNode]
forall st. GenParser Char st [TaxMergedNode]
genParserNCBITaxMergedNodes

-- | parse NCBITaxNames from input string
parseNCBITaxNames :: String -> Either ParseError [TaxName]
parseNCBITaxNames :: String -> Either ParseError [TaxName]
parseNCBITaxNames = Parsec String () [TaxName]
-> String -> String -> Either ParseError [TaxName]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxName]
forall st. GenParser Char st [TaxName]
genParserNCBITaxNames String
"parseTaxNames"

-- | parse NCBITaxNames from input filePath
readNCBITaxNames :: String -> IO (Either ParseError [TaxName])
readNCBITaxNames :: String -> IO (Either ParseError [TaxName])
readNCBITaxNames = Parsec String () [TaxName]
-> String -> IO (Either ParseError [TaxName])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxName]
forall st. GenParser Char st [TaxName]
genParserNCBITaxNames

-- | parse NCBITaxNames from input string
parseNCBITaxNodes :: String -> Either ParseError TaxNode
parseNCBITaxNodes :: String -> Either ParseError TaxNode
parseNCBITaxNodes = Parsec String () TaxNode
-> String -> String -> Either ParseError TaxNode
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () TaxNode
forall st. GenParser Char st TaxNode
genParserNCBITaxNode String
"parseTaxNode"

-- | parse NCBITaxCitations from input filePath
readNCBITaxNodes :: String -> IO (Either ParseError [TaxNode])
readNCBITaxNodes :: String -> IO (Either ParseError [TaxNode])
readNCBITaxNodes = Parser [TaxNode] -> String -> IO (Either ParseError [TaxNode])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parser [TaxNode]
forall st. GenParser Char st [TaxNode]
genParserNCBITaxNodes

-- | parse NCBISimpleTaxNames from input string
parseNCBISimpleTaxons :: String -> Either ParseError SimpleTaxon
parseNCBISimpleTaxons :: String -> Either ParseError SimpleTaxon
parseNCBISimpleTaxons = Parsec String () SimpleTaxon
-> String -> String -> Either ParseError SimpleTaxon
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () SimpleTaxon
forall st. GenParser Char st SimpleTaxon
genParserNCBISimpleTaxon String
"parseSimpleTaxon"

-- | parse NCBITaxCitations from input filePath
readNCBISimpleTaxons :: String -> IO (Either ParseError [SimpleTaxon])
readNCBISimpleTaxons :: String -> IO (Either ParseError [SimpleTaxon])
readNCBISimpleTaxons = Parser [SimpleTaxon]
-> String -> IO (Either ParseError [SimpleTaxon])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parser [SimpleTaxon]
forall st. GenParser Char st [SimpleTaxon]
genParserNCBISimpleTaxons

-- | Parse the input as NCBITax datatype
readNCBITaxonomyDatabase :: String -> IO (Either [String] NCBITaxDump)
readNCBITaxonomyDatabase :: String -> IO (Either [String] NCBITaxDump)
readNCBITaxonomyDatabase String
folder = do
  Either ParseError [TaxCitation]
citations <- String -> IO (Either ParseError [TaxCitation])
readNCBITaxCitations (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"citations.dmp")
  let citationsError :: String
citationsError = Either ParseError [TaxCitation] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxCitation]
citations
  Either ParseError [TaxDelNode]
taxdelNodes <- String -> IO (Either ParseError [TaxDelNode])
readNCBITaxDelNodes (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"delnodes.dmp")
  let delNodesError :: String
delNodesError = Either ParseError [TaxDelNode] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxDelNode]
taxdelNodes
  Either ParseError [TaxDivision]
divisons <- String -> IO (Either ParseError [TaxDivision])
readNCBITaxDivisions (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"division.dmp")
  let divisonsError :: String
divisonsError = Either ParseError [TaxDivision] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxDivision]
divisons
  Either ParseError [TaxGenCode]
genCodes <- String -> IO (Either ParseError [TaxGenCode])
readNCBITaxGenCodes (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"gencode.dmp")
  let genCodesError :: String
genCodesError = Either ParseError [TaxGenCode] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxGenCode]
genCodes
  Either ParseError [TaxMergedNode]
mergedNodes <- String -> IO (Either ParseError [TaxMergedNode])
readNCBITaxMergedNodes (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"merged.dmp")
  let mergedNodesError :: String
mergedNodesError = Either ParseError [TaxMergedNode] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxMergedNode]
mergedNodes
  Either ParseError [TaxName]
names <- String -> IO (Either ParseError [TaxName])
readNCBITaxNames (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"names.dmp")
  let namesError :: String
namesError = Either ParseError [TaxName] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxName]
names
  Either ParseError [TaxNode]
taxnodes <- String -> IO (Either ParseError [TaxNode])
readNCBITaxNodes (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"nodes.dmp")
  let nodesError :: String
nodesError = Either ParseError [TaxNode] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxNode]
taxnodes
  let parseErrors :: [String]
parseErrors =  [String
citationsError, String
delNodesError, String
divisonsError, String
genCodesError, String
mergedNodesError, String
namesError, String
nodesError]
  Either [String] NCBITaxDump -> IO (Either [String] NCBITaxDump)
forall (m :: * -> *) a. Monad m => a -> m a
return ([String]
-> Either ParseError [TaxCitation]
-> Either ParseError [TaxDelNode]
-> Either ParseError [TaxDivision]
-> Either ParseError [TaxGenCode]
-> Either ParseError [TaxMergedNode]
-> Either ParseError [TaxName]
-> Either ParseError [TaxNode]
-> Either [String] NCBITaxDump
checkParsing [String]
parseErrors Either ParseError [TaxCitation]
citations Either ParseError [TaxDelNode]
taxdelNodes Either ParseError [TaxDivision]
divisons Either ParseError [TaxGenCode]
genCodes Either ParseError [TaxMergedNode]
mergedNodes Either ParseError [TaxName]
names Either ParseError [TaxNode]
taxnodes)

genParserNCBITaxCitations :: GenParser Char st [TaxCitation]
genParserNCBITaxCitations :: GenParser Char st [TaxCitation]
genParserNCBITaxCitations = ParsecT String st Identity TaxCitation
-> GenParser Char st [TaxCitation]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxCitation
forall st. GenParser Char st TaxCitation
genParserNCBITaxCitation

genParserNCBITaxDelNodes :: GenParser Char st [TaxDelNode]
genParserNCBITaxDelNodes :: GenParser Char st [TaxDelNode]
genParserNCBITaxDelNodes = ParsecT String st Identity TaxDelNode
-> GenParser Char st [TaxDelNode]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxDelNode
forall st. GenParser Char st TaxDelNode
genParserNCBITaxDelNode

genParserNCBITaxDivisons :: GenParser Char st [TaxDivision]
genParserNCBITaxDivisons :: GenParser Char st [TaxDivision]
genParserNCBITaxDivisons = ParsecT String st Identity TaxDivision
-> GenParser Char st [TaxDivision]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxDivision
forall st. GenParser Char st TaxDivision
genParserNCBITaxDivision

genParserNCBITaxGenCodes :: GenParser Char st [TaxGenCode]
genParserNCBITaxGenCodes :: GenParser Char st [TaxGenCode]
genParserNCBITaxGenCodes = ParsecT String st Identity TaxGenCode
-> GenParser Char st [TaxGenCode]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxGenCode
forall st. GenParser Char st TaxGenCode
genParserNCBITaxGenCode


genParserNCBITaxMergedNodes :: GenParser Char st [TaxMergedNode]
genParserNCBITaxMergedNodes :: GenParser Char st [TaxMergedNode]
genParserNCBITaxMergedNodes = ParsecT String st Identity TaxMergedNode
-> GenParser Char st [TaxMergedNode]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxMergedNode
forall st. GenParser Char st TaxMergedNode
genParserNCBITaxMergedNode


genParserNCBITaxNames :: GenParser Char st [TaxName]
genParserNCBITaxNames :: GenParser Char st [TaxName]
genParserNCBITaxNames = ParsecT String st Identity TaxName -> GenParser Char st [TaxName]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxName
forall st. GenParser Char st TaxName
genParserNCBITaxName

genParserNCBITaxNodes :: GenParser Char st [TaxNode]
genParserNCBITaxNodes :: GenParser Char st [TaxNode]
genParserNCBITaxNodes = ParsecT String st Identity TaxNode -> GenParser Char st [TaxNode]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxNode
forall st. GenParser Char st TaxNode
genParserNCBITaxNode

genParserNCBISimpleTaxons :: GenParser Char st [SimpleTaxon]
genParserNCBISimpleTaxons :: GenParser Char st [SimpleTaxon]
genParserNCBISimpleTaxons = ParsecT String st Identity SimpleTaxon
-> GenParser Char st [SimpleTaxon]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity SimpleTaxon
forall st. GenParser Char st SimpleTaxon
genParserNCBISimpleTaxon


genParserNCBITaxCitation :: GenParser Char st TaxCitation
genParserNCBITaxCitation :: GenParser Char st TaxCitation
genParserNCBITaxCitation = do
  String
_citId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_citKey <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  Maybe String
_pubmedId <- ParsecT String st Identity String
-> ParsecT String st Identity (Maybe String)
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit)
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  Maybe String
_medlineId <- ParsecT String st Identity String
-> ParsecT String st Identity (Maybe String)
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit)
  ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
  Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
  ByteString
_url <- GenParser Char st ByteString
forall st. GenParser Char st ByteString
genParserTaxURL
  Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
  ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
  String
_text <- (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t"))
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  [Int]
_taxIdList <- (ParsecT String st Identity Int -> ParsecT String st Identity [Int]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT String st Identity Int
forall st. GenParser Char st Int
genParserTaxIdList)
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\n"
  TaxCitation -> GenParser Char st TaxCitation
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxCitation -> GenParser Char st TaxCitation)
-> TaxCitation -> GenParser Char st TaxCitation
forall a b. (a -> b) -> a -> b
$ Int
-> ByteString
-> Maybe Int
-> Maybe Int
-> ByteString
-> ByteString
-> [Int]
-> TaxCitation
TaxCitation (String -> Int
readInt String
_citId) (String -> ByteString
B.pack String
_citKey) ((String -> Int) -> Maybe String -> Maybe Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> Int
readInt Maybe String
_pubmedId) ((String -> Int) -> Maybe String -> Maybe Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> Int
readInt Maybe String
_medlineId) ByteString
_url (String -> ByteString
B.pack String
_text) [Int]
_taxIdList

genParserNCBITaxDelNode :: GenParser Char st TaxDelNode
genParserNCBITaxDelNode :: GenParser Char st TaxDelNode
genParserNCBITaxDelNode = do
  String
taxdelNode <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
space
  Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
  Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n'
  TaxDelNode -> GenParser Char st TaxDelNode
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxDelNode -> GenParser Char st TaxDelNode)
-> TaxDelNode -> GenParser Char st TaxDelNode
forall a b. (a -> b) -> a -> b
$ Int -> TaxDelNode
TaxDelNode (String -> Int
readInt String
taxdelNode)

genParserNCBITaxDivision :: GenParser Char st TaxDivision
genParserNCBITaxDivision :: GenParser Char st TaxDivision
genParserNCBITaxDivision = do
  String
_divisionId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_divisionCDE <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
upper
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_divisionName <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_comments <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\n"
  TaxDivision -> GenParser Char st TaxDivision
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxDivision -> GenParser Char st TaxDivision)
-> TaxDivision -> GenParser Char st TaxDivision
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString -> ByteString -> TaxDivision
TaxDivision (String -> Int
readInt String
_divisionId) (String -> ByteString
B.pack String
_divisionCDE) (String -> ByteString
B.pack String
_divisionName) (String -> ByteString
B.pack String
_comments)

genParserNCBITaxGenCode :: GenParser Char st TaxGenCode
genParserNCBITaxGenCode :: GenParser Char st TaxGenCode
genParserNCBITaxGenCode = do
  String
_geneticCodeId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_abbreviation <- (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t"))
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_genCodeName <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_cde <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_starts <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\n"
  TaxGenCode -> GenParser Char st TaxGenCode
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxGenCode -> GenParser Char st TaxGenCode)
-> TaxGenCode -> GenParser Char st TaxGenCode
forall a b. (a -> b) -> a -> b
$ Int
-> ByteString
-> ByteString
-> ByteString
-> ByteString
-> TaxGenCode
TaxGenCode (String -> Int
readInt String
_geneticCodeId) (String -> ByteString
B.pack String
_abbreviation) (String -> ByteString
B.pack String
_genCodeName) (String -> ByteString
B.pack String
_cde) (String -> ByteString
B.pack String
_starts)

genParserNCBITaxMergedNode :: GenParser Char st TaxMergedNode
genParserNCBITaxMergedNode :: GenParser Char st TaxMergedNode
genParserNCBITaxMergedNode = do
  String
_oldTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_newTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\n"
  TaxMergedNode -> GenParser Char st TaxMergedNode
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxMergedNode -> GenParser Char st TaxMergedNode)
-> TaxMergedNode -> GenParser Char st TaxMergedNode
forall a b. (a -> b) -> a -> b
$ Int -> Int -> TaxMergedNode
TaxMergedNode (String -> Int
readInt String
_oldTaxId) (String -> Int
readInt String
_newTaxId)

genParserNCBITaxName :: GenParser Char st TaxName
genParserNCBITaxName :: GenParser Char st TaxName
genParserNCBITaxName = do
  String
_taxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_nameTxt <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t\n")
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_uniqueName <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t\n")
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_nameClass <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t\n")
  ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
  Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
  ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
newline
  TaxName -> GenParser Char st TaxName
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxName -> GenParser Char st TaxName)
-> TaxName -> GenParser Char st TaxName
forall a b. (a -> b) -> a -> b
$! Int -> Text -> ByteString -> ByteString -> TaxName
TaxName (String -> Int
readInt String
_taxId) (String -> Text
T.pack String
_nameTxt) (String -> ByteString
B.pack String
_uniqueName) (String -> ByteString
B.pack String
_nameClass)

genParserNCBISimpleTaxon :: GenParser Char st SimpleTaxon
genParserNCBISimpleTaxon :: GenParser Char st SimpleTaxon
genParserNCBISimpleTaxon = do
  String
_simpleTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_simpleParentTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_simpleRank <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\n")
  Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n'
  SimpleTaxon -> GenParser Char st SimpleTaxon
forall (m :: * -> *) a. Monad m => a -> m a
return (SimpleTaxon -> GenParser Char st SimpleTaxon)
-> SimpleTaxon -> GenParser Char st SimpleTaxon
forall a b. (a -> b) -> a -> b
$! Int -> Text -> Int -> Rank -> SimpleTaxon
SimpleTaxon (String -> Int
readInt String
_simpleTaxId) Text
T.empty (String -> Int
readInt String
_simpleParentTaxId) (String -> Rank
readRank String
_simpleRank)

genParserNCBITaxNode :: GenParser Char st TaxNode
genParserNCBITaxNode :: GenParser Char st TaxNode
genParserNCBITaxNode = do
  String
_taxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_parentTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_rank <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_emblCode <- (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t"))
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_divisionId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_inheritedDivFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_geneticCodeId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_inheritedGCFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_mitochondrialGeneticCodeId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_inheritedMGCFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_genBankHiddenFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_hiddenSubtreeRootFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
  String
_comments <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
  Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
  Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n'
  TaxNode -> GenParser Char st TaxNode
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxNode -> GenParser Char st TaxNode)
-> TaxNode -> GenParser Char st TaxNode
forall a b. (a -> b) -> a -> b
$ Int
-> Int
-> Rank
-> ByteString
-> Int
-> Bool
-> Int
-> Bool
-> Int
-> Bool
-> Bool
-> Bool
-> ByteString
-> TaxNode
TaxNode (String -> Int
readInt String
_taxId) (String -> Int
readInt String
_parentTaxId) (String -> Rank
readRank String
_rank) (String -> ByteString
B.pack String
_emblCode) (String -> Int
forall a. Read a => String -> a
read String
_divisionId :: Int) (String -> Bool
readBool String
_inheritedDivFlag) (String -> Int
forall a. Read a => String -> a
read String
_geneticCodeId ::Int) (String -> Bool
readBool String
_inheritedGCFlag) (String -> Int
forall a. Read a => String -> a
read String
_mitochondrialGeneticCodeId ::Int) (String -> Bool
readBool String
_inheritedMGCFlag) (String -> Bool
readBool String
_genBankHiddenFlag) (String -> Bool
readBool String
_hiddenSubtreeRootFlag) (String -> ByteString
B.pack String
_comments)

---------------------------------------
-- Auxiliary functions
readInt :: String -> Int
readInt :: String -> Int
readInt = String -> Int
forall a. Read a => String -> a
read

readBool :: String -> Bool
readBool :: String -> Bool
readBool String
"0" = Bool
False
readBool String
"1" = Bool
True
readBool String
_ = Bool
False

readRank :: String -> Rank
readRank :: String -> Rank
readRank String
a = String -> Rank
forall a. Read a => String -> a
read  String
a :: Rank

genParserTaxIdList :: GenParser Char st Int
genParserTaxIdList :: GenParser Char st Int
genParserTaxIdList = do
  ParsecT String st Identity Char -> ParsecT String st Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional (Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
' ')
  String
_taxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
  ParsecT String st Identity Char -> ParsecT String st Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional (Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
' ')
  Int -> GenParser Char st Int
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Int
readInt String
_taxId)

genParserTaxURL :: GenParser Char st B.ByteString
genParserTaxURL :: GenParser Char st ByteString
genParserTaxURL = do
  ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
  String
url1 <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
  ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
  String
url2 <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"|")
  ByteString -> GenParser Char st ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> ByteString
B.pack (String
url1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
url2))
  --return (concatenateURLParts url1 url2)

concatenateURLParts :: Maybe String -> Maybe String -> Maybe String
concatenateURLParts :: Maybe String -> Maybe String -> Maybe String
concatenateURLParts Maybe String
url1 Maybe String
url2
  | Maybe String -> Bool
forall a. Maybe a -> Bool
isJust Maybe String
url1 Bool -> Bool -> Bool
&& Maybe String -> Bool
forall a. Maybe a -> Bool
isJust Maybe String
url2 = Maybe String -> Maybe String -> Maybe String
maybeStringConcat Maybe String
url1 Maybe String
url2
  | Maybe String -> Bool
forall a. Maybe a -> Bool
isJust Maybe String
url1 Bool -> Bool -> Bool
&& Maybe String -> Bool
forall a. Maybe a -> Bool
isNothing Maybe String
url2 = Maybe String
url1
  | Bool
otherwise = Maybe String
forall a. Maybe a
Nothing

maybeStringConcat :: Maybe String -> Maybe String -> Maybe String
maybeStringConcat :: Maybe String -> Maybe String -> Maybe String
maybeStringConcat = (String -> String -> String)
-> Maybe String -> Maybe String -> Maybe String
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 String -> String -> String
forall a. [a] -> [a] -> [a]
(++)

readEncodedFile :: TextEncoding -> FilePath -> IO String
readEncodedFile :: TextEncoding -> String -> IO String
readEncodedFile TextEncoding
encoding String
name = do
  Handle
handle <- String -> IOMode -> IO Handle
openFile String
name IOMode
ReadMode
  Handle -> TextEncoding -> IO ()
hSetEncoding Handle
handle TextEncoding
encoding
  Handle -> IO String
hGetContents Handle
handle

parseFromFileEncISO88591 :: Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 :: Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 Parser a
parser String
fname = do
         String
input <- TextEncoding -> String -> IO String
readEncodedFile TextEncoding
latin1 String
fname
         Either ParseError a -> IO (Either ParseError a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Parser a -> () -> String -> String -> Either ParseError a
forall s t u a.
Stream s Identity t =>
Parsec s u a -> u -> String -> s -> Either ParseError a
runP Parser a
parser () String
fname String
input)

-- | check a list of parsing results for presence of Left aka Parse error
checkParsing :: [String] -> Either ParseError [TaxCitation] -> Either ParseError [TaxDelNode] -> Either ParseError [TaxDivision] -> Either ParseError [TaxGenCode] -> Either ParseError [TaxMergedNode] -> Either ParseError [TaxName] -> Either ParseError [TaxNode]-> Either [String] NCBITaxDump
checkParsing :: [String]
-> Either ParseError [TaxCitation]
-> Either ParseError [TaxDelNode]
-> Either ParseError [TaxDivision]
-> Either ParseError [TaxGenCode]
-> Either ParseError [TaxMergedNode]
-> Either ParseError [TaxName]
-> Either ParseError [TaxNode]
-> Either [String] NCBITaxDump
checkParsing [String]
parseErrors Either ParseError [TaxCitation]
citations Either ParseError [TaxDelNode]
taxdelNodes Either ParseError [TaxDivision]
divisons Either ParseError [TaxGenCode]
genCodes Either ParseError [TaxMergedNode]
mergedNodes Either ParseError [TaxName]
names Either ParseError [TaxNode]
taxnodes
  | [String] -> String
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join [String]
parseErrors String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"" = NCBITaxDump -> Either [String] NCBITaxDump
forall a b. b -> Either a b
Right ([TaxCitation]
-> [TaxDelNode]
-> [TaxDivision]
-> [TaxGenCode]
-> [TaxMergedNode]
-> [TaxName]
-> [TaxNode]
-> NCBITaxDump
NCBITaxDump (Either ParseError [TaxCitation] -> [TaxCitation]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxCitation]
citations) (Either ParseError [TaxDelNode] -> [TaxDelNode]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxDelNode]
taxdelNodes) (Either ParseError [TaxDivision] -> [TaxDivision]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxDivision]
divisons) (Either ParseError [TaxGenCode] -> [TaxGenCode]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxGenCode]
genCodes) (Either ParseError [TaxMergedNode] -> [TaxMergedNode]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxMergedNode]
mergedNodes) (Either ParseError [TaxName] -> [TaxName]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxName]
names) (Either ParseError [TaxNode] -> [TaxNode]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxNode]
taxnodes))
  | Bool
otherwise = [String] -> Either [String] NCBITaxDump
forall a b. a -> Either a b
Left [String]
parseErrors

extractParseError :: Either ParseError a -> String
extractParseError :: Either ParseError a -> String
extractParseError Either ParseError a
_parse
  | Either ParseError a -> Bool
forall a b. Either a b -> Bool
E.isLeft Either ParseError a
_parse = ParseError -> String
forall a. Show a => a -> String
show (Either ParseError a -> ParseError
forall a b. Either a b -> a
E.fromLeft Either ParseError a
_parse)
  | Bool
otherwise = String
""