{-# LANGUAGE CPP #-}
module Network.HTTP.Directory
( httpDirectory,
httpDirectory',
httpRawDirectory,
httpExists,
httpFileSize,
httpLastModified,
httpManager,
httpRedirect,
httpRedirect',
httpRedirects,
isHttpUrl,
Manager
) where
#if (defined(MIN_VERSION_base) && MIN_VERSION_base(4,8,0))
#else
import Control.Applicative ((<$>))
#endif
import Control.Monad (when)
import qualified Data.ByteString.Char8 as B
import qualified Data.List as L
import Data.Maybe
import Data.Text (Text, isPrefixOf, isInfixOf)
import Data.Time.Clock (UTCTime)
import Network.HTTP.Client (hrRedirects, httpLbs, httpNoBody, Manager, method,
newManager, parseRequest,
Request, Response, responseBody, responseHeaders,
responseOpenHistory, responseStatus)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Network.HTTP.Date (httpDateToUTC, parseHTTPDate)
import Network.HTTP.Types (hContentLength, hLocation, methodHead, statusCode)
import Text.HTML.DOM (parseLBS)
import Text.XML.Cursor
httpDirectory :: Manager -> String -> IO [Text]
httpDirectory mgr url = do
hrefs <- httpRawDirectory mgr url
return $ L.nub $ filter (not . or . flist (map isPrefixOf ["/","?"] ++ [(`elem` ["../", "..", "#"]), (":" `isInfixOf`)])) hrefs
flist :: [a->b] -> a -> [b]
flist fs a = map ($ a) fs
httpDirectory' :: String -> IO [Text]
httpDirectory' url = do
mgr <- httpManager
httpDirectory mgr url
httpRawDirectory :: Manager -> String -> IO [Text]
httpRawDirectory mgr url = do
request <- parseRequest url
response <- httpLbs request mgr
checkResponse url response
let body = responseBody response
doc = parseLBS body
cursor = fromDocument doc
return $ concatMap (attribute "href") $ cursor $// element "a"
httpExists :: Manager -> String -> IO Bool
httpExists mgr url = do
response <- httpHead mgr url
return $ statusCode (responseStatus response) == 200
httpFileSize :: Manager -> String -> IO (Maybe Integer)
httpFileSize mgr url = do
response <- httpHead mgr url
checkResponse url response
let headers = responseHeaders response
return $ read . B.unpack <$> lookup hContentLength headers
httpLastModified :: Manager -> String -> IO (Maybe UTCTime)
httpLastModified mgr url = do
response <- httpHead mgr url
checkResponse url response
let headers = responseHeaders response
mdate = lookup "Last-Modified" headers
return $ httpDateToUTC <$> maybe Nothing parseHTTPDate mdate
checkResponse :: String -> Response r -> IO ()
checkResponse url response =
when (statusCode (responseStatus response) /= 200) $ do
putStrLn url
error $ show $ responseStatus response
httpManager :: IO Manager
httpManager =
newManager tlsManagerSettings
httpRedirects :: Manager -> String -> IO [B.ByteString]
httpRedirects mgr url = do
request <- parseRequestHead url
respHist <- responseOpenHistory request mgr
return $ reverse $ mapMaybe (lookup hLocation . responseHeaders . snd) $ hrRedirects respHist
httpRedirect :: Manager -> String -> IO (Maybe B.ByteString)
httpRedirect mgr url =
listToMaybe <$> httpRedirects mgr url
httpRedirect' :: String -> IO (Maybe B.ByteString)
httpRedirect' url = do
mgr <- httpManager
listToMaybe <$> httpRedirects mgr url
parseRequestHead :: String -> IO Request
parseRequestHead url = do
request <- parseRequest url
return $ request {method = methodHead}
httpHead :: Manager -> String -> IO (Response ())
httpHead mgr url = do
request <- parseRequestHead url
httpNoBody request mgr
isHttpUrl :: String -> Bool
isHttpUrl loc = "http:" `L.isPrefixOf` loc || "https:" `L.isPrefixOf` loc