{-# LANGUAGE CPP #-}
module Network.HTTP.Directory
( httpDirectory,
httpDirectory',
httpRawDirectory,
httpExists,
httpFileSize,
httpLastModified,
httpManager,
httpRedirect,
httpRedirect',
httpRedirects
) where
#if (defined(MIN_VERSION_base) && MIN_VERSION_base(4,8,0))
#else
import Control.Applicative ((<$>))
#endif
import qualified Data.ByteString.Char8 as B
import Data.List (nub)
import Data.Maybe
import Data.Text (Text, isPrefixOf, pack)
import Data.Time.Clock (UTCTime)
import Network.HTTP.Client (hrRedirects, httpLbs, httpNoBody, Manager, method,
newManager, parseRequest,
Request, Response, responseBody, responseHeaders,
responseOpenHistory, responseStatus)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Network.HTTP.Date (httpDateToUTC, parseHTTPDate)
import Network.HTTP.Types (hContentLength, hLocation, methodHead, statusCode)
import Text.HTML.DOM (parseLBS)
import Text.XML.Cursor
httpDirectory :: Manager -> String -> IO [Text]
httpDirectory mgr url = do
hrefs <- httpRawDirectory mgr url
return $ nub $ filter (not . or . flist [isHttp, ("/" `isPrefixOf`), (pack "../" ==), ("?" `isPrefixOf`)]) hrefs
where
isHttp loc = "http:" `isPrefixOf` loc || "https:" `isPrefixOf` loc
flist :: [a->b] -> a -> [b]
flist fs a = map ($ a) fs
httpDirectory' :: String -> IO [Text]
httpDirectory' url = do
mgr <- httpManager
httpDirectory mgr url
httpRawDirectory :: Manager -> String -> IO [Text]
httpRawDirectory mgr url = do
request <- parseRequest url
response <- httpLbs request mgr
if statusCode (responseStatus response) /= 200
then do
putStrLn url
error $ show $ responseStatus response
else do
let body = responseBody response
doc = parseLBS body
cursor = fromDocument doc
return $ concatMap (attribute "href") $ cursor $// element "a"
httpExists :: Manager -> String -> IO Bool
httpExists mgr url = do
response <- httpHead mgr url
return $ statusCode (responseStatus response) == 200
httpFileSize :: Manager -> String -> IO (Maybe Integer)
httpFileSize mgr url = do
response <- httpHead mgr url
if statusCode (responseStatus response) /= 200
then do
putStrLn url
error $ show $ responseStatus response
else do
let headers = responseHeaders response
return $ read . B.unpack <$> lookup hContentLength headers
httpLastModified :: Manager -> String -> IO (Maybe UTCTime)
httpLastModified mgr url = do
response <- httpHead mgr url
if statusCode (responseStatus response) /= 200
then do
putStrLn url
error $ show $ responseStatus response
else do
let headers = responseHeaders response
mdate = lookup "Last-Modified" headers
return $ httpDateToUTC <$> maybe Nothing parseHTTPDate mdate
httpManager :: IO Manager
httpManager =
newManager tlsManagerSettings
httpRedirects :: Manager -> String -> IO [B.ByteString]
httpRedirects mgr url = do
request <- parseRequestHead url
respHist <- responseOpenHistory request mgr
return $ reverse $ mapMaybe (lookup hLocation . responseHeaders . snd) $ hrRedirects respHist
httpRedirect :: Manager -> String -> IO (Maybe B.ByteString)
httpRedirect mgr url =
listToMaybe <$> httpRedirects mgr url
httpRedirect' :: String -> IO (Maybe B.ByteString)
httpRedirect' url = do
mgr <- httpManager
listToMaybe <$> httpRedirects mgr url
parseRequestHead :: String -> IO Request
parseRequestHead url = do
request <- parseRequest url
return $ request {method = methodHead}
httpHead :: Manager -> String -> IO (Response ())
httpHead mgr url = do
request <- parseRequestHead url
httpNoBody request mgr