module Hakyll.Web.Html
(
withTags
, demoteHeaders
, getUrls
, withUrls
, toUrl
, toSiteRoot
, isExternal
, stripTags
, escapeHtml
) where
import Data.Char (digitToInt, intToDigit,
isDigit, toLower)
import Data.List (isPrefixOf)
import qualified Data.Set as S
import System.FilePath.Posix (joinPath, splitPath,
takeDirectory)
import Text.Blaze.Html (toHtml)
import Text.Blaze.Html.Renderer.String (renderHtml)
import qualified Text.HTML.TagSoup as TS
import Network.URI (isUnreserved, escapeURIString)
withTags :: (TS.Tag String -> TS.Tag String) -> String -> String
withTags f = renderTags' . map f . parseTags'
demoteHeaders :: String -> String
demoteHeaders = withTags $ \tag -> case tag of
TS.TagOpen t a -> TS.TagOpen (demote t) a
TS.TagClose t -> TS.TagClose (demote t)
t -> t
where
demote t@['h', n]
| isDigit n = ['h', intToDigit (min 6 $ digitToInt n + 1)]
| otherwise = t
demote t = t
isUrlAttribute :: String -> Bool
isUrlAttribute = (`elem` ["src", "href", "data", "poster"])
getUrls :: [TS.Tag String] -> [String]
getUrls tags = [v | TS.TagOpen _ as <- tags, (k, v) <- as, isUrlAttribute k]
withUrls :: (String -> String) -> String -> String
withUrls f = withTags tag
where
tag (TS.TagOpen s a) = TS.TagOpen s $ map attr a
tag x = x
attr (k, v) = (k, if isUrlAttribute k then f v else v)
renderTags' :: [TS.Tag String] -> String
renderTags' = TS.renderTagsOptions TS.RenderOptions
{ TS.optRawTag = (`elem` ["script", "style"]) . map toLower
, TS.optMinimize = (`S.member` minimize) . map toLower
, TS.optEscape = id
}
where
minimize = S.fromList
[ "area", "br", "col", "embed", "hr", "img", "input", "meta", "link"
, "param"
]
parseTags' :: String -> [TS.Tag String]
parseTags' = TS.parseTagsOptions (TS.parseOptions :: TS.ParseOptions String)
{ TS.optEntityData = \(str, b) -> [TS.TagText $ "&" ++ str ++ [';' | b]]
, TS.optEntityAttrib = \(str, b) -> ("&" ++ str ++ [';' | b], [])
}
toUrl :: FilePath -> String
toUrl url = case url of
('/' : xs) -> '/' : sanitize xs
xs -> '/' : sanitize xs
where
sanitize = escapeURIString (\c -> c == '/' || isUnreserved c)
toSiteRoot :: String -> String
toSiteRoot = emptyException . joinPath . map parent
. filter relevant . splitPath . takeDirectory
where
parent = const ".."
emptyException [] = "."
emptyException x = x
relevant "." = False
relevant "/" = False
relevant "./" = False
relevant _ = True
isExternal :: String -> Bool
isExternal url = any (flip isPrefixOf url) ["http://", "https://", "//"]
stripTags :: String -> String
stripTags [] = []
stripTags ('<' : xs) = stripTags $ drop 1 $ dropWhile (/= '>') xs
stripTags (x : xs) = x : stripTags xs
escapeHtml :: String -> String
escapeHtml = renderHtml . toHtml