{-# LANGUAGE CPP #-}
module Network.HTTP.Directory
( httpDirectory,
httpDirectory',
httpRawDirectory,
httpExists,
httpFileSize,
httpLastModified,
httpManager,
httpRedirect,
httpRedirect',
httpRedirects,
isHttpUrl,
trailingSlash,
noTrailingSlash,
Manager
) where
#if (defined(MIN_VERSION_base) && MIN_VERSION_base(4,8,0))
#else
import Control.Applicative ((<$>))
#endif
import Control.Monad (when)
import qualified Data.ByteString.Char8 as B
import qualified Data.List as L
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Clock (UTCTime)
import Network.HTTP.Client (hrRedirects, httpLbs, httpNoBody, Manager, method,
newManager, parseRequest, Request,
Response, responseBody, responseHeaders,
responseOpenHistory, responseStatus)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Network.HTTP.Date (httpDateToUTC, parseHTTPDate)
import Network.HTTP.Types (hContentLength, hLocation, methodHead, statusCode)
import Network.URI (parseURI, URI(..))
import Text.HTML.DOM (parseLBS)
import Text.XML.Cursor
httpDirectory :: Manager -> String -> IO [Text]
httpDirectory mgr url = do
hrefs <- httpRawDirectory mgr url
return $ defaultFilesFilter uri hrefs
where
uri = parseURI url
defaultFilesFilter :: Maybe URI -> [Text] -> [Text]
defaultFilesFilter mUri =
L.nub . filter (not . or . flist (map T.isInfixOf [":", "?", "/"] ++ [(`elem` ["../", "..", "#"])])) . map removePath
where
flist :: [a->b] -> a -> [b]
flist fs a = map ($ a) fs
removePath :: Text -> Text
removePath t =
case mpath of
Nothing -> t
Just path ->
fromMaybe t $ T.stripPrefix (T.pack path) t
mpath = uriPath <$> mUri
httpDirectory' :: String -> IO [Text]
httpDirectory' url = do
mgr <- httpManager
httpDirectory mgr url
httpRawDirectory :: Manager -> String -> IO [Text]
httpRawDirectory mgr url = do
request <- parseRequest url
response <- httpLbs request mgr
checkResponse url response
let body = responseBody response
doc = parseLBS body
cursor = fromDocument doc
return $ concatMap (attribute "href") $ cursor $// element "a"
httpExists :: Manager -> String -> IO Bool
httpExists mgr url = do
response <- httpHead mgr url
return $ statusCode (responseStatus response) == 200
httpFileSize :: Manager -> String -> IO (Maybe Integer)
httpFileSize mgr url = do
response <- httpHead mgr url
checkResponse url response
let headers = responseHeaders response
return $ read . B.unpack <$> lookup hContentLength headers
httpLastModified :: Manager -> String -> IO (Maybe UTCTime)
httpLastModified mgr url = do
response <- httpHead mgr url
checkResponse url response
let headers = responseHeaders response
mdate = lookup "Last-Modified" headers
return $ httpDateToUTC <$> (parseHTTPDate =<< mdate)
checkResponse :: String -> Response r -> IO ()
checkResponse url response =
when (statusCode (responseStatus response) /= 200) $ do
putStrLn url
error' $ show $ responseStatus response
httpManager :: IO Manager
httpManager =
newManager tlsManagerSettings
httpRedirects :: Manager -> String -> IO [B.ByteString]
httpRedirects mgr url = do
request <- parseRequestHead url
respHist <- responseOpenHistory request mgr
return $ reverse $ mapMaybe (lookup hLocation . responseHeaders . snd) $ hrRedirects respHist
httpRedirect :: Manager -> String -> IO (Maybe B.ByteString)
httpRedirect mgr url =
listToMaybe <$> httpRedirects mgr url
httpRedirect' :: String -> IO (Maybe B.ByteString)
httpRedirect' url = do
mgr <- httpManager
listToMaybe <$> httpRedirects mgr url
parseRequestHead :: String -> IO Request
parseRequestHead url = do
request <- parseRequest url
return $ request {method = methodHead}
httpHead :: Manager -> String -> IO (Response ())
httpHead mgr url = do
request <- parseRequestHead url
httpNoBody request mgr
isHttpUrl :: String -> Bool
isHttpUrl loc = "http:" `L.isPrefixOf` loc || "https:" `L.isPrefixOf` loc
trailingSlash :: String -> String
trailingSlash "" = ""
trailingSlash str =
if last str == '/' then str else str ++ "/"
noTrailingSlash :: Text -> Text
noTrailingSlash = T.dropWhileEnd (== '/')
error' :: String -> a
#if (defined(MIN_VERSION_base) && MIN_VERSION_base(4,9,0))
error' = errorWithoutStackTrace
#else
error' = error
#endif