{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
-- | Streaming parsers for the Atom 1.0 standard.
module Text.Atom.Conduit.Parse
( -- * Top-level
atomFeed
-- * Elements
, atomEntry
, atomContent
, atomCategory
, atomLink
, atomGenerator
, atomSource
-- * Constructs
, atomPerson
, atomText
) where
-- {{{ Imports
import Blaze.ByteString.Builder (toByteString)
import Conduit (foldC, headC, headDefC, sinkList)
import Control.Applicative hiding (many)
import Control.Exception.Safe as Exception
import Control.Monad hiding (foldM)
import Control.Monad.Fix
import Data.Conduit
import Data.Maybe
import Data.Monoid
import Data.MonoTraversable
import Data.NonNull (NonNull, fromNullable, toNullable)
import Data.Text as Text (Text, unpack)
import Data.Text.Encoding
import Data.Time.Clock
import Data.Time.LocalTime
import Data.Time.RFC3339
import Data.XML.Types
import Lens.Simple
import Prelude hiding (last, lookup)
import Text.Atom.Types
import Text.XML.Stream.Parse
import qualified Text.XML.Stream.Render as Render
import URI.ByteString
-- }}}
-- {{{ Util
data AtomException = InvalidDate Text
| InvalidURI URIParseError
| MissingElement Text
| NullElement
deriving instance Eq AtomException
deriving instance Show AtomException
instance Exception AtomException where
displayException (InvalidDate t) = "Invalid date: " ++ unpack t
displayException (InvalidURI e) = "Invalid URI reference: " ++ show e
displayException (MissingElement t) = "Missing element: " ++ unpack t
displayException NullElement = "Null element"
asURIReference :: MonadThrow m => Text -> m AtomURI
asURIReference t = case (parseURI' t, parseRelativeRef' t) of
(Right u, _) -> return $ AtomURI u
(_, Right u) -> return $ AtomURI u
(Left _, Left e) -> throwM $ InvalidURI e
where parseURI' = parseURI laxURIParserOptions . encodeUtf8
parseRelativeRef' = parseRelativeRef laxURIParserOptions . encodeUtf8
asNonNull :: (MonoFoldable a, MonadThrow m) => a -> m (NonNull a)
asNonNull = liftMaybe NullElement . fromNullable
liftMaybe :: (MonadThrow m, Exception e) => e -> Maybe a -> m a
liftMaybe e = maybe (throw e) return
-- | Like 'tagName' but ignores the namespace.
tagName' :: MonadThrow m => Text -> AttrParser a -> (a -> ConduitM Event o m b) -> ConduitM Event o m (Maybe b)
tagName' t = tag' (matching $ \n -> nameLocalName n == t && nameNamespace n == Just "http://www.w3.org/2005/Atom")
-- | Tag which content is a date-time that follows RFC 3339 format.
tagDate :: MonadThrow m => Text -> ConduitM Event o m (Maybe UTCTime)
tagDate name = tagIgnoreAttrs' name $ do
text <- content
zonedTimeToUTC <$> liftMaybe (InvalidDate text) (parseTimeRFC3339 text)
-- | Like 'tagName'' but ignores all attributes.
tagIgnoreAttrs' :: MonadThrow m => Text -> ConduitM Event o m a -> ConduitM Event o m (Maybe a)
tagIgnoreAttrs' name handler = tagName' name ignoreAttrs $ const handler
xhtmlContent :: MonadThrow m => ConduitM Event o m Text
xhtmlContent = fmap (decodeUtf8 . toByteString) $ many_ takeAnyTreeContent =$= Render.renderBuilder def =$= foldC
projectC :: Monad m => Fold a a' b b' -> Conduit a m b
projectC prism = fix $ \recurse -> do
item <- await
case (item, item ^? (_Just . prism)) of
(_, Just a) -> yield a >> recurse
(Just _, _) -> recurse
_ -> return ()
headRequiredC :: MonadThrow m => Text -> Consumer a m a
headRequiredC e = liftMaybe (MissingElement e) =<< headC
atomId :: MonadThrow m => ConduitM Event o m (Maybe Text)
atomId = tagIgnoreAttrs' "id" content
atomIcon, atomLogo :: MonadThrow m => ConduitM Event o m (Maybe AtomURI)
atomIcon = tagIgnoreAttrs' "icon" $ content >>= asURIReference
atomLogo = tagIgnoreAttrs' "logo" $ content >>= asURIReference
-- }}}
data PersonPiece = PersonName (NonNull Text)
| PersonEmail Text
| PersonUri AtomURI
makeTraversals ''PersonPiece
-- | Parse an Atom person construct.
-- Example:
--
-- >
-- > John Doe
-- > JohnDoe@example.com
-- > http://example.com/~johndoe
-- >
atomPerson :: MonadThrow m => Text -> ConduitM Event o m (Maybe AtomPerson)
atomPerson name = tagIgnoreAttrs' name $ (manyYield' (choose piece) =$= parser) <* many ignoreAnyTreeContent where
parser = getZipConduit $ AtomPerson
<$> ZipConduit (projectC _PersonName =$= headRequiredC "Missing or invalid element")
<*> ZipConduit (projectC _PersonEmail =$= headDefC "")
<*> ZipConduit (projectC _PersonUri =$= headC)
piece = [ fmap PersonName <$> tagIgnoreAttrs' "name" (content >>= asNonNull)
, fmap PersonEmail <$> tagIgnoreAttrs' "email" content
, fmap PersonUri <$> tagIgnoreAttrs' "uri" (content >>= asURIReference)
]
-- | Parse an @atom:category@ element.
-- Example:
--
-- >
atomCategory :: MonadThrow m => ConduitM Event o m (Maybe AtomCategory)
atomCategory = tagName' "category" categoryAttrs $ \(t, s, l) -> do
term <- asNonNull t
return $ AtomCategory term s l
where categoryAttrs = (,,) <$> requireAttr "term"
<*> (requireAttr "scheme" <|> pure mempty)
<*> (requireAttr "label" <|> pure mempty)
<* ignoreAttrs
-- | Parse an @atom:content@ element.
atomContent :: MonadThrow m => ConduitM Event o m (Maybe AtomContent)
atomContent = tagName' "content" contentAttrs handler where
contentAttrs = (,) <$> optional (requireAttr "type") <*> optional (requireAttr "src" >>= asURIReference) <* ignoreAttrs
handler (Just "xhtml", _) = AtomContentInlineXHTML <$> force "
" (tagIgnoreAttrs "{http://www.w3.org/1999/xhtml}div" xhtmlContent)
handler (ctype, Just uri) = return $ AtomContentOutOfLine (fromMaybe mempty ctype) uri
handler (Just "html", _) = AtomContentInlineText TypeHTML <$> content
handler (Nothing, _) = AtomContentInlineText TypeText <$> content
handler (Just ctype, _) = AtomContentInlineOther ctype <$> content
-- | Parse an @atom:link@ element.
-- Examples:
--
-- >
--
-- >
atomLink :: MonadThrow m => ConduitM Event o m (Maybe AtomLink)
atomLink = tagName' "link" linkAttrs $ \(href, rel, ltype, lang, title, length') ->
return $ AtomLink href rel ltype lang title length'
where linkAttrs = (,,,,,) <$> (requireAttr "href" >>= asURIReference)
<*> (requireAttr "rel" <|> pure mempty)
<*> (requireAttr "type" <|> pure mempty)
<*> (requireAttr "hreflang" <|> pure mempty)
<*> (requireAttr "title" <|> pure mempty)
<*> (requireAttr "length" <|> pure mempty)
<* ignoreAttrs
-- | Parse an Atom text construct.
-- Examples:
--
-- > AT&T bought by SBC!
--
-- >
-- > AT&T bought <b>by SBC</b>!
-- >
--
-- >
-- >
-- > AT&T bought by SBC!
-- >
-- >
atomText :: MonadThrow m => Text -> ConduitM Event o m (Maybe AtomText)
atomText name = tagName' name (optional (requireAttr "type") <* ignoreAttrs) handler
where handler (Just "xhtml") = AtomXHTMLText <$> force "