module Biobase.Taxonomy.Import (
module Biobase.Taxonomy.Types,
readTaxonomy,
readNamedTaxonomy,
parseTaxonomy,
parseNCBITaxCitations,
readNCBITaxCitations,
parseNCBITaxDelNodes,
readNCBITaxDelNodes,
parseNCBITaxDivisions,
readNCBITaxDivisions,
parseNCBITaxGenCodes,
readNCBITaxGenCodes,
parseNCBITaxMergedNodes,
readNCBITaxMergedNodes,
parseNCBITaxNames,
readNCBITaxNames,
parseNCBITaxNodes,
readNCBITaxNodes,
parseNCBISimpleTaxons,
readNCBISimpleTaxons,
readNCBITaxonomyDatabase
) where
import Prelude
import System.IO
import Biobase.Taxonomy.Types
import Text.Parsec.Prim (runP)
import Text.ParserCombinators.Parsec
import Control.Monad
import Data.List
import Data.Maybe
import qualified Data.Either.Unwrap as E
import Data.Graph.Inductive.Graph
import Data.Graph.Inductive.Tree
import qualified Data.ByteString.Char8 as B
import qualified Data.Text.Lazy as T
readNamedTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))
readNamedTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))
readNamedTaxonomy String
directoryPath = do
Either ParseError [TaxName]
nodeNames <- String -> IO (Either ParseError [TaxName])
readNCBITaxNames (String
directoryPath String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"names.dmp")
if Either ParseError [TaxName] -> Bool
forall a b. Either a b -> Bool
E.isLeft Either ParseError [TaxName]
nodeNames
then Either ParseError (Gr SimpleTaxon Double)
-> IO (Either ParseError (Gr SimpleTaxon Double))
forall (m :: * -> *) a. Monad m => a -> m a
return (ParseError -> Either ParseError (Gr SimpleTaxon Double)
forall a b. a -> Either a b
Left (Either ParseError [TaxName] -> ParseError
forall a b. Either a b -> a
E.fromLeft Either ParseError [TaxName]
nodeNames))
else do
let rightNodeNames :: [TaxName]
rightNodeNames = Either ParseError [TaxName] -> [TaxName]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxName]
nodeNames
let filteredNodeNames :: [TaxName]
filteredNodeNames = (TaxName -> Bool) -> [TaxName] -> [TaxName]
forall a. (a -> Bool) -> [a] -> [a]
filter TaxName -> Bool
isScientificName [TaxName]
rightNodeNames
let namedTaxonomyGraph :: GenParser Char st (Gr SimpleTaxon Double)
namedTaxonomyGraph = [TaxName] -> GenParser Char st (Gr SimpleTaxon Double)
forall st. [TaxName] -> GenParser Char st (Gr SimpleTaxon Double)
genParserNamedTaxonomyGraph [TaxName]
filteredNodeNames
Parser (Gr SimpleTaxon Double)
-> String -> IO (Either ParseError (Gr SimpleTaxon Double))
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 Parser (Gr SimpleTaxon Double)
forall st. GenParser Char st (Gr SimpleTaxon Double)
namedTaxonomyGraph (String
directoryPath String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"nodes.dmp")
isScientificName :: TaxName -> Bool
isScientificName :: TaxName -> Bool
isScientificName TaxName
name = TaxName -> ByteString
nameClass TaxName
name ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
scientificNameT
where scientificNameT :: ByteString
scientificNameT = String -> ByteString
B.pack String
"scientific name"
readTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))
readTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))
readTaxonomy = Parser (Gr SimpleTaxon Double)
-> String -> IO (Either ParseError (Gr SimpleTaxon Double))
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 Parser (Gr SimpleTaxon Double)
forall st. GenParser Char st (Gr SimpleTaxon Double)
genParserTaxonomyGraph
parseTaxonomy :: String -> Either ParseError (Gr SimpleTaxon Double)
parseTaxonomy :: String -> Either ParseError (Gr SimpleTaxon Double)
parseTaxonomy = Parser (Gr SimpleTaxon Double)
-> String -> String -> Either ParseError (Gr SimpleTaxon Double)
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parser (Gr SimpleTaxon Double)
forall st. GenParser Char st (Gr SimpleTaxon Double)
genParserTaxonomyGraph String
"parseTaxonomy"
genParserTaxonomyGraph :: GenParser Char st (Gr SimpleTaxon Double)
genParserTaxonomyGraph :: GenParser Char st (Gr SimpleTaxon Double)
genParserTaxonomyGraph = do
[((Int, SimpleTaxon), (Int, Int, Double))]
nodesEdges <- ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
-> ParsecT
String st Identity [((Int, SimpleTaxon), (Int, Int, Double))]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
-> ParsecT
String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
forall tok st a. GenParser tok st a -> GenParser tok st a
try ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
forall st.
GenParser Char st ((Int, SimpleTaxon), (Int, Int, Double))
genParserGraphNodeEdge)
ParsecT String st Identity () -> ParsecT String st Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional ParsecT String st Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
let ([(Int, SimpleTaxon)]
nodesList,[(Int, Int, Double)]
edgesList) = [((Int, SimpleTaxon), (Int, Int, Double))]
-> ([(Int, SimpleTaxon)], [(Int, Int, Double)])
forall a b. [(a, b)] -> ([a], [b])
unzip [((Int, SimpleTaxon), (Int, Int, Double))]
nodesEdges
let taxedges :: [(Int, Int, Double)]
taxedges = ((Int, Int, Double) -> Bool)
-> [(Int, Int, Double)] -> [(Int, Int, Double)]
forall a. (a -> Bool) -> [a] -> [a]
filter (Int, Int, Double) -> Bool
forall a. (Int, Int, a) -> Bool
notLoopEdge [(Int, Int, Double)]
edgesList
let currentGraph :: Gr SimpleTaxon Double
currentGraph = [(Int, SimpleTaxon)]
-> [(Int, Int, Double)] -> Gr SimpleTaxon Double
forall (gr :: * -> * -> *) a b.
Graph gr =>
[LNode a] -> [LEdge b] -> gr a b
mkGraph [(Int, SimpleTaxon)]
nodesList [(Int, Int, Double)]
taxedges
Gr SimpleTaxon Double -> GenParser Char st (Gr SimpleTaxon Double)
forall (m :: * -> *) a. Monad m => a -> m a
return Gr SimpleTaxon Double
currentGraph
notLoopEdge :: (Int,Int,a) -> Bool
notLoopEdge :: (Int, Int, a) -> Bool
notLoopEdge (Int
a,Int
b,a
_) = Int
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
b
genParserNamedTaxonomyGraph :: [TaxName] -> GenParser Char st (Gr SimpleTaxon Double)
genParserNamedTaxonomyGraph :: [TaxName] -> GenParser Char st (Gr SimpleTaxon Double)
genParserNamedTaxonomyGraph [TaxName]
filteredNodeNames = do
[((Int, SimpleTaxon), (Int, Int, Double))]
nodesEdges <- (ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
-> ParsecT
String st Identity [((Int, SimpleTaxon), (Int, Int, Double))]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
-> ParsecT
String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
forall tok st a. GenParser tok st a -> GenParser tok st a
try ParsecT String st Identity ((Int, SimpleTaxon), (Int, Int, Double))
forall st.
GenParser Char st ((Int, SimpleTaxon), (Int, Int, Double))
genParserGraphNodeEdge))
ParsecT String st Identity () -> ParsecT String st Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional ParsecT String st Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
let ([(Int, SimpleTaxon)]
nodesList,[(Int, Int, Double)]
edgesList) = [((Int, SimpleTaxon), (Int, Int, Double))]
-> ([(Int, SimpleTaxon)], [(Int, Int, Double)])
forall a b. [(a, b)] -> ([a], [b])
unzip [((Int, SimpleTaxon), (Int, Int, Double))]
nodesEdges
let taxedges :: [(Int, Int, Double)]
taxedges = ((Int, Int, Double) -> Bool)
-> [(Int, Int, Double)] -> [(Int, Int, Double)]
forall a. (a -> Bool) -> [a] -> [a]
filter (Int, Int, Double) -> Bool
forall a. (Int, Int, a) -> Bool
notLoopEdge [(Int, Int, Double)]
edgesList
let taxnamednodes :: [(Int, SimpleTaxon)]
taxnamednodes = ((Int, SimpleTaxon) -> (Int, SimpleTaxon))
-> [(Int, SimpleTaxon)] -> [(Int, SimpleTaxon)]
forall a b. (a -> b) -> [a] -> [b]
map ([TaxName] -> (Int, SimpleTaxon) -> (Int, SimpleTaxon)
forall t. [TaxName] -> (t, SimpleTaxon) -> (t, SimpleTaxon)
setNodeScientificName [TaxName]
filteredNodeNames) [(Int, SimpleTaxon)]
nodesList
let currentGraph :: Gr SimpleTaxon Double
currentGraph = [(Int, SimpleTaxon)]
-> [(Int, Int, Double)] -> Gr SimpleTaxon Double
forall (gr :: * -> * -> *) a b.
Graph gr =>
[LNode a] -> [LEdge b] -> gr a b
mkGraph [(Int, SimpleTaxon)]
taxnamednodes [(Int, Int, Double)]
taxedges
Gr SimpleTaxon Double -> GenParser Char st (Gr SimpleTaxon Double)
forall (m :: * -> *) a. Monad m => a -> m a
return Gr SimpleTaxon Double
currentGraph
setNodeScientificName :: [TaxName] -> (t, SimpleTaxon) -> (t, SimpleTaxon)
setNodeScientificName :: [TaxName] -> (t, SimpleTaxon) -> (t, SimpleTaxon)
setNodeScientificName [TaxName]
inputTaxNames (t
inputNode,SimpleTaxon
inputTaxon) = (t, SimpleTaxon)
outputNode
where maybeRetrievedName :: Maybe TaxName
maybeRetrievedName = (TaxName -> Bool) -> [TaxName] -> Maybe TaxName
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (SimpleTaxon -> TaxName -> Bool
isTaxNameIdSimpleTaxid SimpleTaxon
inputTaxon) [TaxName]
inputTaxNames
retrievedName :: Text
retrievedName = Text -> (TaxName -> Text) -> Maybe TaxName -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (String -> Text
T.pack String
"no name") TaxName -> Text
nameTxt Maybe TaxName
maybeRetrievedName
outputNode :: (t, SimpleTaxon)
outputNode = (t
inputNode,SimpleTaxon
inputTaxon{$sel:simpleScientificName:SimpleTaxon :: Text
simpleScientificName = Text
retrievedName})
isTaxNameIdSimpleTaxid :: SimpleTaxon -> TaxName -> Bool
isTaxNameIdSimpleTaxid :: SimpleTaxon -> TaxName -> Bool
isTaxNameIdSimpleTaxid SimpleTaxon
inputTaxon TaxName
inputTaxName = TaxName -> Int
nameTaxId TaxName
inputTaxName Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== SimpleTaxon -> Int
simpleTaxId SimpleTaxon
inputTaxon
genParserGraphNodeEdge :: GenParser Char st ((Int,SimpleTaxon),(Int,Int,Double))
genParserGraphNodeEdge :: GenParser Char st ((Int, SimpleTaxon), (Int, Int, Double))
genParserGraphNodeEdge = do
String
_simpleTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_simpleParentTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_simpleRank <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\n")
Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n'
let _simpleTaxIdInt :: Int
_simpleTaxIdInt = String -> Int
readInt String
_simpleTaxId
let _simpleParentTaxIdInt :: Int
_simpleParentTaxIdInt = String -> Int
readInt String
_simpleParentTaxId
((Int, SimpleTaxon), (Int, Int, Double))
-> GenParser Char st ((Int, SimpleTaxon), (Int, Int, Double))
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int
_simpleTaxIdInt,Int -> Text -> Int -> Rank -> SimpleTaxon
SimpleTaxon Int
_simpleTaxIdInt Text
T.empty Int
_simpleParentTaxIdInt (String -> Rank
readRank String
_simpleRank)),(Int
_simpleTaxIdInt,Int
_simpleParentTaxIdInt,Double
1 :: Double))
parseNCBITaxCitations :: String -> Either ParseError [TaxCitation]
parseNCBITaxCitations :: String -> Either ParseError [TaxCitation]
parseNCBITaxCitations = Parsec String () [TaxCitation]
-> String -> String -> Either ParseError [TaxCitation]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxCitation]
forall st. GenParser Char st [TaxCitation]
genParserNCBITaxCitations String
"parseTaxCitations"
readNCBITaxCitations :: String -> IO (Either ParseError [TaxCitation])
readNCBITaxCitations :: String -> IO (Either ParseError [TaxCitation])
readNCBITaxCitations = Parsec String () [TaxCitation]
-> String -> IO (Either ParseError [TaxCitation])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 Parsec String () [TaxCitation]
forall st. GenParser Char st [TaxCitation]
genParserNCBITaxCitations
parseNCBITaxDelNodes :: String -> Either ParseError [TaxDelNode]
parseNCBITaxDelNodes :: String -> Either ParseError [TaxDelNode]
parseNCBITaxDelNodes = Parsec String () [TaxDelNode]
-> String -> String -> Either ParseError [TaxDelNode]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxDelNode]
forall st. GenParser Char st [TaxDelNode]
genParserNCBITaxDelNodes String
"parseTaxDelNodes"
readNCBITaxDelNodes :: String -> IO (Either ParseError [TaxDelNode])
readNCBITaxDelNodes :: String -> IO (Either ParseError [TaxDelNode])
readNCBITaxDelNodes = Parsec String () [TaxDelNode]
-> String -> IO (Either ParseError [TaxDelNode])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxDelNode]
forall st. GenParser Char st [TaxDelNode]
genParserNCBITaxDelNodes
parseNCBITaxDivisions :: String -> Either ParseError [TaxDivision]
parseNCBITaxDivisions :: String -> Either ParseError [TaxDivision]
parseNCBITaxDivisions = Parsec String () [TaxDivision]
-> String -> String -> Either ParseError [TaxDivision]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxDivision]
forall st. GenParser Char st [TaxDivision]
genParserNCBITaxDivisons String
"parseTaxDivisons"
readNCBITaxDivisions :: String -> IO (Either ParseError [TaxDivision])
readNCBITaxDivisions :: String -> IO (Either ParseError [TaxDivision])
readNCBITaxDivisions = Parsec String () [TaxDivision]
-> String -> IO (Either ParseError [TaxDivision])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxDivision]
forall st. GenParser Char st [TaxDivision]
genParserNCBITaxDivisons
parseNCBITaxGenCodes :: String -> Either ParseError [TaxGenCode]
parseNCBITaxGenCodes :: String -> Either ParseError [TaxGenCode]
parseNCBITaxGenCodes = Parsec String () [TaxGenCode]
-> String -> String -> Either ParseError [TaxGenCode]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxGenCode]
forall st. GenParser Char st [TaxGenCode]
genParserNCBITaxGenCodes String
"parseTaxGenCodes"
readNCBITaxGenCodes :: String -> IO (Either ParseError [TaxGenCode])
readNCBITaxGenCodes :: String -> IO (Either ParseError [TaxGenCode])
readNCBITaxGenCodes = Parsec String () [TaxGenCode]
-> String -> IO (Either ParseError [TaxGenCode])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxGenCode]
forall st. GenParser Char st [TaxGenCode]
genParserNCBITaxGenCodes
parseNCBITaxMergedNodes :: String -> Either ParseError [TaxMergedNode]
parseNCBITaxMergedNodes :: String -> Either ParseError [TaxMergedNode]
parseNCBITaxMergedNodes = Parsec String () [TaxMergedNode]
-> String -> String -> Either ParseError [TaxMergedNode]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxMergedNode]
forall st. GenParser Char st [TaxMergedNode]
genParserNCBITaxMergedNodes String
"parseTaxMergedNodes"
readNCBITaxMergedNodes :: String -> IO (Either ParseError [TaxMergedNode])
readNCBITaxMergedNodes :: String -> IO (Either ParseError [TaxMergedNode])
readNCBITaxMergedNodes = Parsec String () [TaxMergedNode]
-> String -> IO (Either ParseError [TaxMergedNode])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxMergedNode]
forall st. GenParser Char st [TaxMergedNode]
genParserNCBITaxMergedNodes
parseNCBITaxNames :: String -> Either ParseError [TaxName]
parseNCBITaxNames :: String -> Either ParseError [TaxName]
parseNCBITaxNames = Parsec String () [TaxName]
-> String -> String -> Either ParseError [TaxName]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () [TaxName]
forall st. GenParser Char st [TaxName]
genParserNCBITaxNames String
"parseTaxNames"
readNCBITaxNames :: String -> IO (Either ParseError [TaxName])
readNCBITaxNames :: String -> IO (Either ParseError [TaxName])
readNCBITaxNames = Parsec String () [TaxName]
-> String -> IO (Either ParseError [TaxName])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parsec String () [TaxName]
forall st. GenParser Char st [TaxName]
genParserNCBITaxNames
parseNCBITaxNodes :: String -> Either ParseError TaxNode
parseNCBITaxNodes :: String -> Either ParseError TaxNode
parseNCBITaxNodes = Parsec String () TaxNode
-> String -> String -> Either ParseError TaxNode
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () TaxNode
forall st. GenParser Char st TaxNode
genParserNCBITaxNode String
"parseTaxNode"
readNCBITaxNodes :: String -> IO (Either ParseError [TaxNode])
readNCBITaxNodes :: String -> IO (Either ParseError [TaxNode])
readNCBITaxNodes = Parser [TaxNode] -> String -> IO (Either ParseError [TaxNode])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parser [TaxNode]
forall st. GenParser Char st [TaxNode]
genParserNCBITaxNodes
parseNCBISimpleTaxons :: String -> Either ParseError SimpleTaxon
parseNCBISimpleTaxons :: String -> Either ParseError SimpleTaxon
parseNCBISimpleTaxons = Parsec String () SimpleTaxon
-> String -> String -> Either ParseError SimpleTaxon
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parsec String () SimpleTaxon
forall st. GenParser Char st SimpleTaxon
genParserNCBISimpleTaxon String
"parseSimpleTaxon"
readNCBISimpleTaxons :: String -> IO (Either ParseError [SimpleTaxon])
readNCBISimpleTaxons :: String -> IO (Either ParseError [SimpleTaxon])
readNCBISimpleTaxons = Parser [SimpleTaxon]
-> String -> IO (Either ParseError [SimpleTaxon])
forall a. Parser a -> String -> IO (Either ParseError a)
parseFromFile Parser [SimpleTaxon]
forall st. GenParser Char st [SimpleTaxon]
genParserNCBISimpleTaxons
readNCBITaxonomyDatabase :: String -> IO (Either [String] NCBITaxDump)
readNCBITaxonomyDatabase :: String -> IO (Either [String] NCBITaxDump)
readNCBITaxonomyDatabase String
folder = do
Either ParseError [TaxCitation]
citations <- String -> IO (Either ParseError [TaxCitation])
readNCBITaxCitations (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"citations.dmp")
let citationsError :: String
citationsError = Either ParseError [TaxCitation] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxCitation]
citations
Either ParseError [TaxDelNode]
taxdelNodes <- String -> IO (Either ParseError [TaxDelNode])
readNCBITaxDelNodes (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"delnodes.dmp")
let delNodesError :: String
delNodesError = Either ParseError [TaxDelNode] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxDelNode]
taxdelNodes
Either ParseError [TaxDivision]
divisons <- String -> IO (Either ParseError [TaxDivision])
readNCBITaxDivisions (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"division.dmp")
let divisonsError :: String
divisonsError = Either ParseError [TaxDivision] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxDivision]
divisons
Either ParseError [TaxGenCode]
genCodes <- String -> IO (Either ParseError [TaxGenCode])
readNCBITaxGenCodes (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"gencode.dmp")
let genCodesError :: String
genCodesError = Either ParseError [TaxGenCode] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxGenCode]
genCodes
Either ParseError [TaxMergedNode]
mergedNodes <- String -> IO (Either ParseError [TaxMergedNode])
readNCBITaxMergedNodes (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"merged.dmp")
let mergedNodesError :: String
mergedNodesError = Either ParseError [TaxMergedNode] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxMergedNode]
mergedNodes
Either ParseError [TaxName]
names <- String -> IO (Either ParseError [TaxName])
readNCBITaxNames (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"names.dmp")
let namesError :: String
namesError = Either ParseError [TaxName] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxName]
names
Either ParseError [TaxNode]
taxnodes <- String -> IO (Either ParseError [TaxNode])
readNCBITaxNodes (String
folder String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"nodes.dmp")
let nodesError :: String
nodesError = Either ParseError [TaxNode] -> String
forall a. Either ParseError a -> String
extractParseError Either ParseError [TaxNode]
taxnodes
let parseErrors :: [String]
parseErrors = [String
citationsError, String
delNodesError, String
divisonsError, String
genCodesError, String
mergedNodesError, String
namesError, String
nodesError]
Either [String] NCBITaxDump -> IO (Either [String] NCBITaxDump)
forall (m :: * -> *) a. Monad m => a -> m a
return ([String]
-> Either ParseError [TaxCitation]
-> Either ParseError [TaxDelNode]
-> Either ParseError [TaxDivision]
-> Either ParseError [TaxGenCode]
-> Either ParseError [TaxMergedNode]
-> Either ParseError [TaxName]
-> Either ParseError [TaxNode]
-> Either [String] NCBITaxDump
checkParsing [String]
parseErrors Either ParseError [TaxCitation]
citations Either ParseError [TaxDelNode]
taxdelNodes Either ParseError [TaxDivision]
divisons Either ParseError [TaxGenCode]
genCodes Either ParseError [TaxMergedNode]
mergedNodes Either ParseError [TaxName]
names Either ParseError [TaxNode]
taxnodes)
genParserNCBITaxCitations :: GenParser Char st [TaxCitation]
genParserNCBITaxCitations :: GenParser Char st [TaxCitation]
genParserNCBITaxCitations = ParsecT String st Identity TaxCitation
-> GenParser Char st [TaxCitation]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxCitation
forall st. GenParser Char st TaxCitation
genParserNCBITaxCitation
genParserNCBITaxDelNodes :: GenParser Char st [TaxDelNode]
genParserNCBITaxDelNodes :: GenParser Char st [TaxDelNode]
genParserNCBITaxDelNodes = ParsecT String st Identity TaxDelNode
-> GenParser Char st [TaxDelNode]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxDelNode
forall st. GenParser Char st TaxDelNode
genParserNCBITaxDelNode
genParserNCBITaxDivisons :: GenParser Char st [TaxDivision]
genParserNCBITaxDivisons :: GenParser Char st [TaxDivision]
genParserNCBITaxDivisons = ParsecT String st Identity TaxDivision
-> GenParser Char st [TaxDivision]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxDivision
forall st. GenParser Char st TaxDivision
genParserNCBITaxDivision
genParserNCBITaxGenCodes :: GenParser Char st [TaxGenCode]
genParserNCBITaxGenCodes :: GenParser Char st [TaxGenCode]
genParserNCBITaxGenCodes = ParsecT String st Identity TaxGenCode
-> GenParser Char st [TaxGenCode]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxGenCode
forall st. GenParser Char st TaxGenCode
genParserNCBITaxGenCode
genParserNCBITaxMergedNodes :: GenParser Char st [TaxMergedNode]
genParserNCBITaxMergedNodes :: GenParser Char st [TaxMergedNode]
genParserNCBITaxMergedNodes = ParsecT String st Identity TaxMergedNode
-> GenParser Char st [TaxMergedNode]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxMergedNode
forall st. GenParser Char st TaxMergedNode
genParserNCBITaxMergedNode
genParserNCBITaxNames :: GenParser Char st [TaxName]
genParserNCBITaxNames :: GenParser Char st [TaxName]
genParserNCBITaxNames = ParsecT String st Identity TaxName -> GenParser Char st [TaxName]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxName
forall st. GenParser Char st TaxName
genParserNCBITaxName
genParserNCBITaxNodes :: GenParser Char st [TaxNode]
genParserNCBITaxNodes :: GenParser Char st [TaxNode]
genParserNCBITaxNodes = ParsecT String st Identity TaxNode -> GenParser Char st [TaxNode]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity TaxNode
forall st. GenParser Char st TaxNode
genParserNCBITaxNode
genParserNCBISimpleTaxons :: GenParser Char st [SimpleTaxon]
genParserNCBISimpleTaxons :: GenParser Char st [SimpleTaxon]
genParserNCBISimpleTaxons = ParsecT String st Identity SimpleTaxon
-> GenParser Char st [SimpleTaxon]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity SimpleTaxon
forall st. GenParser Char st SimpleTaxon
genParserNCBISimpleTaxon
genParserNCBITaxCitation :: GenParser Char st TaxCitation
genParserNCBITaxCitation :: GenParser Char st TaxCitation
genParserNCBITaxCitation = do
String
_citId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_citKey <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
Maybe String
_pubmedId <- ParsecT String st Identity String
-> ParsecT String st Identity (Maybe String)
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit)
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
Maybe String
_medlineId <- ParsecT String st Identity String
-> ParsecT String st Identity (Maybe String)
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit)
ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
ByteString
_url <- GenParser Char st ByteString
forall st. GenParser Char st ByteString
genParserTaxURL
Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
String
_text <- (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t"))
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
[Int]
_taxIdList <- (ParsecT String st Identity Int -> ParsecT String st Identity [Int]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT String st Identity Int
forall st. GenParser Char st Int
genParserTaxIdList)
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\n"
TaxCitation -> GenParser Char st TaxCitation
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxCitation -> GenParser Char st TaxCitation)
-> TaxCitation -> GenParser Char st TaxCitation
forall a b. (a -> b) -> a -> b
$ Int
-> ByteString
-> Maybe Int
-> Maybe Int
-> ByteString
-> ByteString
-> [Int]
-> TaxCitation
TaxCitation (String -> Int
readInt String
_citId) (String -> ByteString
B.pack String
_citKey) ((String -> Int) -> Maybe String -> Maybe Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> Int
readInt Maybe String
_pubmedId) ((String -> Int) -> Maybe String -> Maybe Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> Int
readInt Maybe String
_medlineId) ByteString
_url (String -> ByteString
B.pack String
_text) [Int]
_taxIdList
genParserNCBITaxDelNode :: GenParser Char st TaxDelNode
genParserNCBITaxDelNode :: GenParser Char st TaxDelNode
genParserNCBITaxDelNode = do
String
taxdelNode <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
space
Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n'
TaxDelNode -> GenParser Char st TaxDelNode
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxDelNode -> GenParser Char st TaxDelNode)
-> TaxDelNode -> GenParser Char st TaxDelNode
forall a b. (a -> b) -> a -> b
$ Int -> TaxDelNode
TaxDelNode (String -> Int
readInt String
taxdelNode)
genParserNCBITaxDivision :: GenParser Char st TaxDivision
genParserNCBITaxDivision :: GenParser Char st TaxDivision
genParserNCBITaxDivision = do
String
_divisionId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_divisionCDE <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
upper
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_divisionName <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_comments <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\n"
TaxDivision -> GenParser Char st TaxDivision
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxDivision -> GenParser Char st TaxDivision)
-> TaxDivision -> GenParser Char st TaxDivision
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString -> ByteString -> TaxDivision
TaxDivision (String -> Int
readInt String
_divisionId) (String -> ByteString
B.pack String
_divisionCDE) (String -> ByteString
B.pack String
_divisionName) (String -> ByteString
B.pack String
_comments)
genParserNCBITaxGenCode :: GenParser Char st TaxGenCode
genParserNCBITaxGenCode :: GenParser Char st TaxGenCode
genParserNCBITaxGenCode = do
String
_geneticCodeId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_abbreviation <- (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t"))
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_genCodeName <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_cde <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_starts <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\n"
TaxGenCode -> GenParser Char st TaxGenCode
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxGenCode -> GenParser Char st TaxGenCode)
-> TaxGenCode -> GenParser Char st TaxGenCode
forall a b. (a -> b) -> a -> b
$ Int
-> ByteString
-> ByteString
-> ByteString
-> ByteString
-> TaxGenCode
TaxGenCode (String -> Int
readInt String
_geneticCodeId) (String -> ByteString
B.pack String
_abbreviation) (String -> ByteString
B.pack String
_genCodeName) (String -> ByteString
B.pack String
_cde) (String -> ByteString
B.pack String
_starts)
genParserNCBITaxMergedNode :: GenParser Char st TaxMergedNode
genParserNCBITaxMergedNode :: GenParser Char st TaxMergedNode
genParserNCBITaxMergedNode = do
String
_oldTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_newTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\n"
TaxMergedNode -> GenParser Char st TaxMergedNode
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxMergedNode -> GenParser Char st TaxMergedNode)
-> TaxMergedNode -> GenParser Char st TaxMergedNode
forall a b. (a -> b) -> a -> b
$ Int -> Int -> TaxMergedNode
TaxMergedNode (String -> Int
readInt String
_oldTaxId) (String -> Int
readInt String
_newTaxId)
genParserNCBITaxName :: GenParser Char st TaxName
genParserNCBITaxName :: GenParser Char st TaxName
genParserNCBITaxName = do
String
_taxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_nameTxt <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t\n")
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_uniqueName <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t\n")
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_nameClass <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t\n")
ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
newline
TaxName -> GenParser Char st TaxName
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxName -> GenParser Char st TaxName)
-> TaxName -> GenParser Char st TaxName
forall a b. (a -> b) -> a -> b
$! Int -> Text -> ByteString -> ByteString -> TaxName
TaxName (String -> Int
readInt String
_taxId) (String -> Text
T.pack String
_nameTxt) (String -> ByteString
B.pack String
_uniqueName) (String -> ByteString
B.pack String
_nameClass)
genParserNCBISimpleTaxon :: GenParser Char st SimpleTaxon
genParserNCBISimpleTaxon :: GenParser Char st SimpleTaxon
genParserNCBISimpleTaxon = do
String
_simpleTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_simpleParentTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_simpleRank <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\n")
Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n'
SimpleTaxon -> GenParser Char st SimpleTaxon
forall (m :: * -> *) a. Monad m => a -> m a
return (SimpleTaxon -> GenParser Char st SimpleTaxon)
-> SimpleTaxon -> GenParser Char st SimpleTaxon
forall a b. (a -> b) -> a -> b
$! Int -> Text -> Int -> Rank -> SimpleTaxon
SimpleTaxon (String -> Int
readInt String
_simpleTaxId) Text
T.empty (String -> Int
readInt String
_simpleParentTaxId) (String -> Rank
readRank String
_simpleRank)
genParserNCBITaxNode :: GenParser Char st TaxNode
genParserNCBITaxNode :: GenParser Char st TaxNode
genParserNCBITaxNode = do
String
_taxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_parentTaxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_rank <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_emblCode <- (ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t"))
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_divisionId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_inheritedDivFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_geneticCodeId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_inheritedGCFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_mitochondrialGeneticCodeId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_inheritedMGCFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_genBankHiddenFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_hiddenSubtreeRootFlag <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
String -> ParsecT String st Identity String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"\t|\t"
String
_comments <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n'
TaxNode -> GenParser Char st TaxNode
forall (m :: * -> *) a. Monad m => a -> m a
return (TaxNode -> GenParser Char st TaxNode)
-> TaxNode -> GenParser Char st TaxNode
forall a b. (a -> b) -> a -> b
$ Int
-> Int
-> Rank
-> ByteString
-> Int
-> Bool
-> Int
-> Bool
-> Int
-> Bool
-> Bool
-> Bool
-> ByteString
-> TaxNode
TaxNode (String -> Int
readInt String
_taxId) (String -> Int
readInt String
_parentTaxId) (String -> Rank
readRank String
_rank) (String -> ByteString
B.pack String
_emblCode) (String -> Int
forall a. Read a => String -> a
read String
_divisionId :: Int) (String -> Bool
readBool String
_inheritedDivFlag) (String -> Int
forall a. Read a => String -> a
read String
_geneticCodeId ::Int) (String -> Bool
readBool String
_inheritedGCFlag) (String -> Int
forall a. Read a => String -> a
read String
_mitochondrialGeneticCodeId ::Int) (String -> Bool
readBool String
_inheritedMGCFlag) (String -> Bool
readBool String
_genBankHiddenFlag) (String -> Bool
readBool String
_hiddenSubtreeRootFlag) (String -> ByteString
B.pack String
_comments)
readInt :: String -> Int
readInt :: String -> Int
readInt = String -> Int
forall a. Read a => String -> a
read
readBool :: String -> Bool
readBool :: String -> Bool
readBool String
"0" = Bool
False
readBool String
"1" = Bool
True
readBool String
_ = Bool
False
readRank :: String -> Rank
readRank :: String -> Rank
readRank String
a = String -> Rank
forall a. Read a => String -> a
read String
a :: Rank
genParserTaxIdList :: GenParser Char st Int
genParserTaxIdList :: GenParser Char st Int
genParserTaxIdList = do
ParsecT String st Identity Char -> ParsecT String st Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional (Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
' ')
String
_taxId <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
ParsecT String st Identity Char -> ParsecT String st Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional (Char -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
' ')
Int -> GenParser Char st Int
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Int
readInt String
_taxId)
genParserTaxURL :: GenParser Char st B.ByteString
genParserTaxURL :: GenParser Char st ByteString
genParserTaxURL = do
ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
String
url1 <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"\t")
ParsecT String st Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
tab
String
url2 <- ParsecT String st Identity Char
-> ParsecT String st Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (String -> ParsecT String st Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"|")
ByteString -> GenParser Char st ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> ByteString
B.pack (String
url1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
url2))
concatenateURLParts :: Maybe String -> Maybe String -> Maybe String
concatenateURLParts :: Maybe String -> Maybe String -> Maybe String
concatenateURLParts Maybe String
url1 Maybe String
url2
| Maybe String -> Bool
forall a. Maybe a -> Bool
isJust Maybe String
url1 Bool -> Bool -> Bool
&& Maybe String -> Bool
forall a. Maybe a -> Bool
isJust Maybe String
url2 = Maybe String -> Maybe String -> Maybe String
maybeStringConcat Maybe String
url1 Maybe String
url2
| Maybe String -> Bool
forall a. Maybe a -> Bool
isJust Maybe String
url1 Bool -> Bool -> Bool
&& Maybe String -> Bool
forall a. Maybe a -> Bool
isNothing Maybe String
url2 = Maybe String
url1
| Bool
otherwise = Maybe String
forall a. Maybe a
Nothing
maybeStringConcat :: Maybe String -> Maybe String -> Maybe String
maybeStringConcat :: Maybe String -> Maybe String -> Maybe String
maybeStringConcat = (String -> String -> String)
-> Maybe String -> Maybe String -> Maybe String
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 String -> String -> String
forall a. [a] -> [a] -> [a]
(++)
readEncodedFile :: TextEncoding -> FilePath -> IO String
readEncodedFile :: TextEncoding -> String -> IO String
readEncodedFile TextEncoding
encoding String
name = do
Handle
handle <- String -> IOMode -> IO Handle
openFile String
name IOMode
ReadMode
Handle -> TextEncoding -> IO ()
hSetEncoding Handle
handle TextEncoding
encoding
Handle -> IO String
hGetContents Handle
handle
parseFromFileEncISO88591 :: Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 :: Parser a -> String -> IO (Either ParseError a)
parseFromFileEncISO88591 Parser a
parser String
fname = do
String
input <- TextEncoding -> String -> IO String
readEncodedFile TextEncoding
latin1 String
fname
Either ParseError a -> IO (Either ParseError a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Parser a -> () -> String -> String -> Either ParseError a
forall s t u a.
Stream s Identity t =>
Parsec s u a -> u -> String -> s -> Either ParseError a
runP Parser a
parser () String
fname String
input)
checkParsing :: [String] -> Either ParseError [TaxCitation] -> Either ParseError [TaxDelNode] -> Either ParseError [TaxDivision] -> Either ParseError [TaxGenCode] -> Either ParseError [TaxMergedNode] -> Either ParseError [TaxName] -> Either ParseError [TaxNode]-> Either [String] NCBITaxDump
checkParsing :: [String]
-> Either ParseError [TaxCitation]
-> Either ParseError [TaxDelNode]
-> Either ParseError [TaxDivision]
-> Either ParseError [TaxGenCode]
-> Either ParseError [TaxMergedNode]
-> Either ParseError [TaxName]
-> Either ParseError [TaxNode]
-> Either [String] NCBITaxDump
checkParsing [String]
parseErrors Either ParseError [TaxCitation]
citations Either ParseError [TaxDelNode]
taxdelNodes Either ParseError [TaxDivision]
divisons Either ParseError [TaxGenCode]
genCodes Either ParseError [TaxMergedNode]
mergedNodes Either ParseError [TaxName]
names Either ParseError [TaxNode]
taxnodes
| [String] -> String
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join [String]
parseErrors String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"" = NCBITaxDump -> Either [String] NCBITaxDump
forall a b. b -> Either a b
Right ([TaxCitation]
-> [TaxDelNode]
-> [TaxDivision]
-> [TaxGenCode]
-> [TaxMergedNode]
-> [TaxName]
-> [TaxNode]
-> NCBITaxDump
NCBITaxDump (Either ParseError [TaxCitation] -> [TaxCitation]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxCitation]
citations) (Either ParseError [TaxDelNode] -> [TaxDelNode]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxDelNode]
taxdelNodes) (Either ParseError [TaxDivision] -> [TaxDivision]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxDivision]
divisons) (Either ParseError [TaxGenCode] -> [TaxGenCode]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxGenCode]
genCodes) (Either ParseError [TaxMergedNode] -> [TaxMergedNode]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxMergedNode]
mergedNodes) (Either ParseError [TaxName] -> [TaxName]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxName]
names) (Either ParseError [TaxNode] -> [TaxNode]
forall a b. Either a b -> b
E.fromRight Either ParseError [TaxNode]
taxnodes))
| Bool
otherwise = [String] -> Either [String] NCBITaxDump
forall a b. a -> Either a b
Left [String]
parseErrors
extractParseError :: Either ParseError a -> String
Either ParseError a
_parse
| Either ParseError a -> Bool
forall a b. Either a b -> Bool
E.isLeft Either ParseError a
_parse = ParseError -> String
forall a. Show a => a -> String
show (Either ParseError a -> ParseError
forall a b. Either a b -> a
E.fromLeft Either ParseError a
_parse)
| Bool
otherwise = String
""