module Text.HTML.Tagchup.Process (
Encoding, Encoded,
evalDecodeAdaptive, decodeAdaptive, decodeTagAdaptive,
getEmbeddedEncoding,
getXMLEncoding,
findMetaEncoding,
getMetaHTTPHeaders,
getHeadTags,
partAttrs,
parts,
takeBeforeMatchingClose,
takeUntilMatchingClose,
) where
import qualified Text.HTML.Tagchup.Tag as Tag
import qualified Text.HTML.Tagchup.Tag.Match as Match
import qualified Text.XML.Basic.ProcessingInstruction as PI
import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.Name as Name
import qualified Text.XML.Basic.Tag as TagX
import qualified Text.HTML.Basic.Tag as TagH
import qualified Text.HTML.Basic.Character as HTMLChar
import qualified Text.HTML.Basic.String as HTMLString
import Text.HTML.Basic.String (Encoded, )
import Control.Monad.Trans.State (State, put, get, evalState, )
import Control.Monad.HT ((<=<), )
import Control.Monad (msum, guard, )
import Control.Applicative ((<|>))
import qualified Data.NonEmpty as NonEmpty
import qualified Data.List.Match as ListMatch
import qualified Data.Foldable as Fold
import Data.Traversable (traverse, )
import Data.List.HT (viewL, takeUntil, switchR, )
import Data.Maybe.HT (toMaybe, )
import Data.Maybe (fromMaybe, mapMaybe, )
type Encoding = String
evalDecodeAdaptive ::
State (Encoded -> String) a -> a
evalDecodeAdaptive =
flip evalState id
decodeAdaptive ::
(Name.Attribute name, Name.Tag name) =>
(Encoding -> Encoded -> String) ->
[Tag.T name [HTMLChar.T]] ->
State (Encoded -> String) [Tag.T name String]
decodeAdaptive getDecoder =
traverse (decodeTagAdaptive getDecoder)
decodeTagAdaptive ::
(Name.Attribute name, Name.Tag name) =>
(Encoding -> Encoded -> String) ->
Tag.T name [HTMLChar.T] ->
State (Encoded -> String) (Tag.T name String)
decodeTagAdaptive getDecoder tag0 =
do decoder <- get
let tag1 =
maybe
(fmap (HTMLString.decode decoder) tag0)
(\(name, s) ->
Tag.special name $
if TagH.cdataName == name
then decoder s
else s)
(Tag.maybeSpecial tag0)
Fold.mapM_ (put . getDecoder) $
(do openTag <- Tag.maybeOpen tag1
uncurry TagH.maybeMetaEncoding openTag <|>
uncurry TagH.maybeMetaCharset openTag)
<|>
(uncurry TagX.maybeXMLEncoding =<< Tag.maybeProcessing tag1)
return tag1
getEmbeddedEncoding ::
(Name.Attribute name, Name.Tag name) =>
[Tag.T name String] -> Maybe Encoding
getEmbeddedEncoding leadingTags =
let xmlEncoding = do
(t,_) <- viewL leadingTags
(name, PI.Known attrs) <- Tag.maybeProcessing t
guard (TagX.xmlName == name)
Attr.lookup Attr.encodingName attrs
in msum $
xmlEncoding :
map
(\tag ->
uncurry TagH.maybeMetaCharset tag <|>
uncurry TagH.maybeMetaEncoding tag)
(mapMaybe Tag.maybeOpen $ getHeadTags leadingTags)
getXMLEncoding ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name String] -> Maybe String
getXMLEncoding tags =
do (t,_) <- viewL tags
uncurry TagX.maybeXMLEncoding =<< Tag.maybeProcessing t
findMetaEncoding ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name String] -> Maybe String
findMetaEncoding =
msum .
map (uncurry TagH.maybeMetaEncoding <=< Tag.maybeOpen) .
getHeadTags
getMetaHTTPHeaders ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name string] -> [(string, string)]
getMetaHTTPHeaders =
mapMaybe (uncurry TagH.maybeMetaHTTPHeader <=< Tag.maybeOpen) .
getHeadTags
getHeadTags ::
(Name.Tag name, Name.Attribute name) =>
[Tag.T name string] -> [Tag.T name string]
getHeadTags =
takeWhile (not . Match.closeLit "head") .
drop 1 .
dropWhile (not . Match.openNameLit "head") .
takeWhile (not . Match.openNameLit "body")
partAttrs ::
(Name.Tag name) =>
(Tag.Name name -> Bool) ->
(([Attr.T name string], [Tag.T name string]) ->
([Attr.T name string], [Tag.T name string])) ->
[Tag.T name string] -> [Tag.T name string]
partAttrs p f =
concatMap
(either
(\((name,attrs),part) ->
let (newAttrs, newPart) = f (attrs, part)
in Tag.Open name newAttrs : newPart ++ [Tag.Close name])
id) .
parts p
parts ::
(Name.Tag name) =>
(Tag.Name name -> Bool) ->
[Tag.T name string] ->
[Either
((Tag.Name name, [Attr.T name string]), [Tag.T name string])
[Tag.T name string]]
parts p =
let recourse ts =
let (prefix0,suffix0) = break (Match.open p (const True)) ts
in Right prefix0 :
fromMaybe []
(do (t, suffix1) <- viewL suffix0
(name, attrs) <- Tag.maybeOpen t
let (part,suffix2) = break (Match.close (name==)) suffix1
return $ Left ((name, attrs), part) : recourse (drop 1 suffix2))
in recourse
nestDiff :: (Eq name) => TagH.Name name -> Tag.T name string -> Int
nestDiff name tag =
fromEnum (Match.open (name==) (const True) tag)
-
fromEnum (Match.close (name==) tag)
countNesting :: (a -> Int) -> [a] -> [Int]
countNesting p = NonEmpty.tail . NonEmpty.scanl (+) 0 . map p
takeBeforeMatch :: (a -> Int) -> [a] -> [a]
takeBeforeMatch p xs =
flip ListMatch.take xs $ takeWhile (>0) $ countNesting p xs
takeBeforeMatchingClose ::
(Eq name) => TagH.Name name -> [Tag.T name string] -> [Tag.T name string]
takeBeforeMatchingClose name = takeBeforeMatch $ nestDiff name
takeUntilMatch :: (a -> Int) -> [a] -> Maybe [a]
takeUntilMatch p xs =
(\ys -> switchR (Just []) (\_ y -> toMaybe (y==0) $ ListMatch.take ys xs) ys) $
takeUntil (==0) $ countNesting p xs
takeUntilMatchingClose ::
(Eq name) =>
TagH.Name name -> [Tag.T name string] -> Maybe [Tag.T name string]
takeUntilMatchingClose name = takeUntilMatch $ nestDiff name