{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Hakyll.Check
( Check (..)
, check
) where
import Control.Concurrent.MVar (MVar, newEmptyMVar, putMVar,
readMVar)
import Control.Exception (SomeAsyncException (..),
SomeException (..), throw, try)
import Control.Monad (foldM, forM_)
import Control.Monad.Reader (ReaderT, ask, runReaderT)
import Control.Monad.State (StateT, get, modify, runStateT)
import Control.Monad.Trans (liftIO)
import Control.Monad.Trans.Resource (runResourceT)
import Data.List (isPrefixOf)
import qualified Data.Map.Lazy as Map
#if MIN_VERSION_base(4,9,0)
import Data.Semigroup (Semigroup (..))
#endif
import Network.URI (unEscapeString)
import System.Directory (doesDirectoryExist,
doesFileExist)
import System.Exit (ExitCode (..))
import System.FilePath (takeDirectory, takeExtension,
(</>))
import qualified Text.HTML.TagSoup as TS
#ifdef CHECK_EXTERNAL
import Data.List (intercalate)
import Data.Typeable (cast)
import Data.Version (versionBranch)
import GHC.Exts (fromString)
import qualified Network.HTTP.Conduit as Http
import qualified Network.HTTP.Types as Http
import qualified Paths_hakyll as Paths_hakyll
#endif
import Hakyll.Core.Configuration
import Hakyll.Core.Logger (Logger)
import qualified Hakyll.Core.Logger as Logger
import Hakyll.Core.Util.File
import Hakyll.Web.Html
data Check = All | InternalLinks
deriving (Eq, Ord, Show)
check :: Configuration -> Logger -> Check -> IO ExitCode
check config logger check' = do
((), state) <- runChecker checkDestination config logger check'
failed <- countFailedLinks state
return $ if failed > 0 then ExitFailure 1 else ExitSuccess
countFailedLinks :: CheckerState -> IO Int
countFailedLinks state = foldM addIfFailure 0 (Map.elems state)
where addIfFailure failures mvar = do
checkerWrite <- readMVar mvar
return $ failures + checkerFaulty checkerWrite
data CheckerRead = CheckerRead
{ checkerConfig :: Configuration
, checkerLogger :: Logger
, checkerCheck :: Check
}
data CheckerWrite = CheckerWrite
{ checkerFaulty :: Int
, checkerOk :: Int
} deriving (Show)
#if MIN_VERSION_base(4,9,0)
instance Semigroup CheckerWrite where
(<>) (CheckerWrite f1 o1) (CheckerWrite f2 o2) =
CheckerWrite (f1 + f2) (o1 + o2)
instance Monoid CheckerWrite where
mempty = CheckerWrite 0 0
mappend = (<>)
#else
instance Monoid CheckerWrite where
mempty = CheckerWrite 0 0
mappend (CheckerWrite f1 o1) (CheckerWrite f2 o2) =
CheckerWrite (f1 + f2) (o1 + o2)
#endif
type CheckerState = Map.Map URL (MVar CheckerWrite)
type Checker a = ReaderT CheckerRead (StateT CheckerState IO) a
type URL = String
runChecker :: Checker a -> Configuration -> Logger -> Check
-> IO (a, CheckerState)
runChecker checker config logger check' = do
let read' = CheckerRead
{ checkerConfig = config
, checkerLogger = logger
, checkerCheck = check'
}
Logger.flush logger
runStateT (runReaderT checker read') Map.empty
checkDestination :: Checker ()
checkDestination = do
config <- checkerConfig <$> ask
files <- liftIO $ getRecursiveContents
(const $ return False) (destinationDirectory config)
let htmls =
[ destinationDirectory config </> file
| file <- files
, takeExtension file == ".html"
]
forM_ htmls checkFile
checkFile :: FilePath -> Checker ()
checkFile filePath = do
logger <- checkerLogger <$> ask
contents <- liftIO $ readFile filePath
Logger.header logger $ "Checking file " ++ filePath
let urls = getUrls $ TS.parseTags contents
forM_ urls $ \url -> do
Logger.debug logger $ "Checking link " ++ url
m <- liftIO newEmptyMVar
checkUrlIfNeeded filePath (canonicalizeUrl url) m
where
canonicalizeUrl url = if schemeRelative url then "http:" ++ url else url
schemeRelative = isPrefixOf "//"
checkUrlIfNeeded :: FilePath -> URL -> MVar CheckerWrite -> Checker ()
checkUrlIfNeeded filepath url m = do
logger <- checkerLogger <$> ask
needsCheck <- (== All) . checkerCheck <$> ask
checked <- (url `Map.member`) <$> get
if not needsCheck || checked
then Logger.debug logger "Already checked, skipping"
else do modify $ Map.insert url m
checkUrl filepath url
checkUrl :: FilePath -> URL -> Checker ()
checkUrl filePath url
| isExternal url = checkExternalUrl url
| hasProtocol url = skip url $ Just "Unknown protocol, skipping"
| otherwise = checkInternalUrl filePath url
where
validProtoChars = ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "+-."
hasProtocol str = case break (== ':') str of
(proto, ':' : _) -> all (`elem` validProtoChars) proto
_ -> False
ok :: URL -> Checker ()
ok url = putCheckResult url mempty {checkerOk = 1}
skip :: URL -> Maybe String -> Checker ()
skip url maybeReason = do
logger <- checkerLogger <$> ask
case maybeReason of
Nothing -> return ()
Just reason -> Logger.debug logger reason
putCheckResult url mempty {checkerOk = 1}
faulty :: URL -> Maybe String -> Checker ()
faulty url reason = do
logger <- checkerLogger <$> ask
Logger.error logger $ "Broken link to " ++ show url ++ explanation
putCheckResult url mempty {checkerFaulty = 1}
where
formatExplanation = (" (" ++) . (++ ")")
explanation = maybe "" formatExplanation reason
putCheckResult :: URL -> CheckerWrite -> Checker ()
putCheckResult url result = do
state <- get
let maybeMVar = Map.lookup url state
case maybeMVar of
Just m -> liftIO $ putMVar m result
Nothing -> do
logger <- checkerLogger <$> ask
Logger.debug logger "Failed to find existing entry for checked URL"
checkInternalUrl :: FilePath -> URL -> Checker ()
checkInternalUrl base url = case url' of
"" -> ok url
_ -> do
config <- checkerConfig <$> ask
let dest = destinationDirectory config
dir = takeDirectory base
filePath
| "/" `isPrefixOf` url' = dest ++ url'
| otherwise = dir </> url'
exists <- checkFileExists filePath
if exists then ok url else faulty url Nothing
where
url' = stripFragments $ unEscapeString url
checkExternalUrl :: URL -> Checker ()
#ifdef CHECK_EXTERNAL
checkExternalUrl url = do
result <- requestExternalUrl url
case result of
Left (SomeException e) ->
case (cast e :: Maybe SomeAsyncException) of
Just ae -> throw ae
_ -> faulty url (Just $ showException e)
Right _ -> ok url
where
showException e = case cast e of
Just (Http.HttpExceptionRequest _ e') -> show e'
_ -> head $ words $ show e
requestExternalUrl :: URL -> Checker (Either SomeException Bool)
requestExternalUrl url = liftIO $ try $ do
mgr <- Http.newManager Http.tlsManagerSettings
runResourceT $ do
request <- Http.parseRequest url
response <- Http.http (settings request) mgr
let code = Http.statusCode (Http.responseStatus response)
return $ code >= 200 && code < 300
where
settings r = r
{ Http.method = "HEAD"
, Http.redirectCount = 10
, Http.requestHeaders = ("User-Agent", ua) : Http.requestHeaders r
}
ua = fromString $ "hakyll-check/" ++
(intercalate "." $ map show $ versionBranch Paths_hakyll.version)
#else
checkExternalUrl url = skip url Nothing
#endif
checkFileExists :: FilePath -> Checker Bool
checkFileExists filePath = liftIO $ do
file <- doesFileExist filePath
dir <- doesDirectoryExist filePath
case (file, dir) of
(True, _) -> return True
(_, True) -> doesFileExist $ filePath </> "index.html"
_ -> return False
stripFragments :: String -> String
stripFragments = takeWhile (not . flip elem ['?', '#'])