{-# LANGUAGE DeriveDataTypeable #-}

module Text.HTML.TagSoup.Options where

import Data.Typeable
import Text.HTML.TagSoup.Type
import Text.HTML.TagSoup.Entity
import Text.StringLike


-- | These options control how 'parseTags' works. The 'ParseOptions' type is usually generated by one of
--   'parseOptions', 'parseOptionsFast' or 'parseOptionsEntities', then selected fields may be overriden.
--
--   The options 'optTagPosition' and 'optTagWarning' specify whether to generate
--   'TagPosition' or 'TagWarning' elements respectively. Usually these options should be set to @False@
--   to simplify future stages, unless you rely on position information or want to give malformed HTML
--   messages to the end user.
--
--   The options 'optEntityData' and 'optEntityAttrib' control how entities, for example @ @ are handled.
--   Both take a string, and a boolean, where @True@ indicates that the entity ended with a semi-colon @;@.
--   Inside normal text 'optEntityData' will be called, and the results will be inserted in the tag stream.
--   Inside a tag attribute 'optEntityAttrib' will be called, and the first component of the result will be used
--   in the attribute, and the second component will be appended after the 'TagOpen' value (usually the second
--   component is @[]@). As an example, to not decode any entities, pass:
--
-- > parseOptions
-- >     {optEntityData=\(str,b) -> [TagText $ "&" ++ str ++ [';' | b]]
-- >     ,optEntityAttrib\(str,b) -> ("&" ++ str ++ [';' | b], [])

--   The 'optTagTextMerge' value specifies if you always want adjacent 'TagText' values to be merged.
--   Merging adjacent pieces of text has a small performance penalty, but will usually make subsequent analysis
--   simpler. Contiguous runs of characters without entities or tags will also be generated as single 'TagText'
--   values.
data ParseOptions str = ParseOptions
    {optTagPosition :: Bool -- ^ Should 'TagPosition' values be given before some items (default=False,fast=False).
    ,optTagWarning :: Bool  -- ^ Should 'TagWarning' values be given (default=False,fast=False)
    ,optEntityData :: (str,Bool) -> [Tag str] -- ^ How to lookup an entity (Bool = has ending @';'@)
    ,optEntityAttrib :: (str,Bool) -> (str,[Tag str]) -- ^ How to lookup an entity in an attribute (Bool = has ending @';'@?)
    ,optTagTextMerge :: Bool -- ^ Require no adjacent 'TagText' values (default=True,fast=False)
    }
    deriving Typeable


-- | A 'ParseOptions' structure using a custom function to lookup attributes. Any attribute
--   that is not found will be left intact, and a 'TagWarning' given (if 'optTagWarning' is set).
--
--   If you do not want to resolve any entities, simpliy pass @const Nothing@ for the lookup function.
parseOptionsEntities :: StringLike str => (str -> Maybe str) -> ParseOptions str
parseOptionsEntities lookupEntity = ParseOptions False False entityData entityAttrib True
    where
        entityData x = TagText a : b
            where (a,b) = entityAttrib x

        entityAttrib ~(x,b) = case lookupEntity x of
            Just y -> (y, [])
            Nothing -> (fromChar '&' `append` x `append` fromString [';'|b]
                       ,[TagWarning $ fromString "Unknown entity: " `append` x])


-- | The default parse options value, described in 'ParseOptions'. Equivalent to
--   @'parseOptionsEntities' 'lookupEntity'@.
parseOptions :: StringLike str => ParseOptions str
parseOptions = parseOptionsEntities $ fmap fromString . lookupEntity . toString


-- | A 'ParseOptions' structure optimised for speed, following the fast options.
parseOptionsFast :: StringLike str => ParseOptions str
parseOptionsFast = parseOptions{optTagTextMerge=False}


-- | Change the underlying string type of a 'ParseOptions' value.
fmapParseOptions :: (StringLike from, StringLike to) => ParseOptions from -> ParseOptions to
fmapParseOptions (ParseOptions a b c d e) = ParseOptions a b c2 d2 e
    where
        c2 ~(x,y) = map (fmap castString) $ c (castString x, y)
        d2 ~(x,y) = (castString r, map (fmap castString) s)
            where (r,s) = d (castString x, y)