module Bio.BlastHTTP ( BlastHTTPQuery (..),
blastHTTP) where
import Network.HTTP.Conduit
import Data.Conduit
import qualified Data.ByteString.Lazy.Char8 as L8
import Control.Monad.IO.Class (liftIO)
import qualified Control.Monad as CM
import Bio.BlastXML
import Text.XML.HXT.Core
import Network
import qualified Data.Conduit.List as CL
import Data.List
import Control.Monad.Error as CM
import Control.Concurrent
import Data.Maybe
import Data.Either
import Bio.Core.Sequence
data BlastHTTPQuery = BlastHTTPQuery
{ program :: Maybe String
, database :: Maybe String
, querySequence :: Maybe SeqData
, optionalArguments :: Maybe String
}
deriving (Show, Eq)
parseHTML :: String -> IOStateArrow s0 b0 XmlTree
parseHTML = readString [withParseHTML yes, withWarnings no]
atName :: ArrowXml a => String -> a XmlTree XmlTree
atName elementId = deep (isElem >>> hasAttrValue "name" (== elementId))
atId :: ArrowXml a => String -> a XmlTree XmlTree
atId elementId = deep (isElem >>> hasAttrValue "id" (== elementId))
startSession :: String -> String -> String -> Maybe String -> IO String
startSession program database querySequence optionalArguments = do
requestXml <- withSocketsDo
$ sendQuery program database querySequence optionalArguments
let requestXMLString = L8.unpack requestXml
CM.liftM head (runX $ parseHTML requestXMLString //> atId "rid" >>> getAttrValue "value")
sendQuery :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendQuery program database querySequence optionalArguments
| isJust optionalArguments = simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program ++ "&DATABASE=" ++ database ++ fromJust optionalArguments ++ "&QUERY=" ++ querySequence)
| otherwise = simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program ++ "&DATABASE=" ++ database ++ "&QUERY=" ++ querySequence)
retrieveSessionStatus :: String -> IO String
retrieveSessionStatus rid = do
statusXml <- withSocketsDo
$ simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=" ++ rid)
let statusXMLString = L8.unpack statusXml
return statusXMLString
retrieveResult :: String -> IO (Either String BlastResult)
retrieveResult rid = do
statusXml <- withSocketsDo
$ simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RESULTS_FILE=on&RID=" ++ rid ++ "&FORMAT_TYPE=XML&FORMAT_OBJECT=Alignment&CMD=Get")
resultXML <- parseXML statusXml
return (Right resultXML)
checkSessionStatus :: String -> IO (Either String BlastResult)
checkSessionStatus rid = do
threadDelay 60000000
status <- retrieveSessionStatus rid
waitOrRetrieve status rid
waitOrRetrieve :: String -> String -> IO (Either String BlastResult)
waitOrRetrieve status rid
| "Status=READY" `isInfixOf` status = retrieveResult rid
| "Status=FAILURE" `isInfixOf` status = do
let exceptionMessage = "Search $rid failed; please report to blast-help at ncbi.nlm.nih.gov.\n"
return (Left exceptionMessage)
| "Status=UNKNOWN" `isInfixOf` status = do
let exceptionMessage = "Search $rid expired.\n"
return (Left exceptionMessage)
| otherwise = checkSessionStatus rid
performQuery :: String -> String -> Maybe SeqData -> Maybe String -> IO (Either String BlastResult)
performQuery program database querySequenceMaybe optionalArgumentMaybe
| isJust querySequenceMaybe = do
rid <- startSession program database (L8.unpack (unSD (fromJust querySequenceMaybe))) optionalArgumentMaybe
checkSessionStatus rid
| otherwise = do
let exceptionMessage = "Error - no query sequence provided"
return (Left exceptionMessage)
blastHTTP :: BlastHTTPQuery -> IO (Either String BlastResult)
blastHTTP (BlastHTTPQuery program database querySequence optionalArguments) = do
let defaultProgram = "blastn"
let defaultDatabase = "refseq_genomic"
let selectedProgram = fromMaybe defaultProgram program
let selectedDatabase = fromMaybe defaultDatabase database
performQuery selectedProgram selectedDatabase querySequence optionalArguments