{-# LANGUAGE OverloadedStrings #-}

module Readability
  ( -- * Data types
    Article (..),

    -- * Construction
    fromByteString,
    fromDocument,
    fromFile,
    fromText,
  )
where

import Data.ByteString.Lazy (ByteString)
import Data.Text.Lazy (Text)
import Readability.Internal as I
import Readability.Types
import qualified Text.HTML.DOM as DOM
import Text.XML (Document)

-- | Extracts article from HTML represented as HTML document.
fromDocument :: Document -> Maybe Article
fromDocument = fromDocument' strictSettings

-- | Extracts article from HTML represented as HTML document.
fromDocument' :: Settings -> Document -> Maybe Article
fromDocument' s d = Article <$> I.summary s d

-- | Extracts article from HTML in ByteString.
fromByteString :: ByteString -> Maybe Article
fromByteString = fromDocument . DOM.parseLBS

-- | Extracts article from HTML in given file.
fromFile :: FilePath -> IO (Maybe Article)
fromFile f = fromDocument <$> DOM.readFile f

-- | Extracts article from HTML in given text.
fromText :: Text -> Maybe Article
fromText = fromDocument . DOM.parseLT

strictSettings :: Settings
strictSettings =
  Settings
    { reRemoveAttributes = (`elem` ["class"])
    }

{-
looseSettings :: Settings
looseSettings =
  Settings
    { reRemoveAttributes = const False
    }
-}