{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
-- if impl(ghc >= 7.2):
--   extensions: DeriveGeneric, StandaloneDeriving

-- |
-- Module: Data.XML.Types
-- Copyright: 2010-2011 John Millikin
-- License: MIT
--
-- Maintainer: jmillikin@gmail.com
-- Portability: portable
--
-- Basic types for representing XML.
--
-- The idea is to have a full set of appropriate types, which various XML
-- libraries can share. Instead of having equivalent-but-incompatible types
-- for every binding, parser, or client, they all share the same types can
-- can thus interoperate easily.
--
-- This library contains complete types for most parts of an XML document,
-- including the prologue, node tree, and doctype. Some basic combinators
-- are included for common tasks, including traversing the node tree and
-- filtering children.
--
module Data.XML.Types
	( -- * Types
	
	  -- ** Document prologue
	  Document (..)
	, Prologue (..)
	, Instruction (..)
	, Miscellaneous (..)
	
	-- ** Document body
	, Node (..)
	, Element (..)
	, Content (..)
	, Name (..)
	
	-- ** Doctypes
	, Doctype (..)
	, ExternalID (..)
	
	-- ** Incremental processing
	, Event (..)
	
	-- * Combinators
	
	-- ** Filters
	, isElement
	, isInstruction
	, isContent
	, isComment
	, isNamed
	
	-- ** Element traversal
	, elementChildren
	, elementContent
	, elementText
	
	-- ** Node traversal
	, nodeChildren
	, nodeContent
	, nodeText
	
	-- ** Attributes
	, hasAttribute
	, hasAttributeText
	, attributeContent
	, attributeText
	) where

import           Control.Monad ((>=>))
import           Data.Data (Data)
import           Data.Function (on)
import           Data.Maybe (isJust)
import           Data.String (IsString, fromString)
import           Data.Text (Text)
import qualified Data.Text as T
import           Data.Typeable (Typeable)
import           Control.DeepSeq (NFData(rnf))

#if MIN_VERSION_base(4,4,0)
import           GHC.Generics (Generic)
#endif

data Document = Document
	{ documentPrologue :: Prologue
	, documentRoot :: Element
	, documentEpilogue :: [Miscellaneous]
	}
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData Document where
	rnf (Document a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Document
#endif

data Prologue = Prologue
	{ prologueBefore :: [Miscellaneous]
	, prologueDoctype :: Maybe Doctype
	, prologueAfter :: [Miscellaneous]
	}
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData Prologue where
	rnf (Prologue a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Prologue
#endif

data Instruction = Instruction
	{ instructionTarget :: Text
	, instructionData :: Text
	}
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData Instruction where
	rnf (Instruction a b) = rnf a `seq` rnf b `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Instruction
#endif

data Miscellaneous
	= MiscInstruction Instruction
	| MiscComment Text
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData Miscellaneous where
	rnf (MiscInstruction a) = rnf a `seq` ()
	rnf (MiscComment a)     = rnf a `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Miscellaneous
#endif

data Node
	= NodeElement Element
	| NodeInstruction Instruction
	| NodeContent Content
	| NodeComment Text
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData Node where
	rnf (NodeElement a)     = rnf a `seq` ()
	rnf (NodeInstruction a) = rnf a `seq` ()
	rnf (NodeContent a)     = rnf a `seq` ()
	rnf (NodeComment a)     = rnf a `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Node
#endif

data Element = Element
	{ elementName :: Name
	, elementAttributes :: [(Name, [Content])]
	, elementNodes :: [Node]
	}
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData Element where
	rnf (Element a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Element
#endif

data Content
	= ContentText Text
	| ContentEntity Text -- ^ For pass-through parsing
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData Content where
	rnf (ContentText a)   = rnf a `seq` ()
	rnf (ContentEntity a) = rnf a `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Content
#endif

-- | A fully qualified name.
--
-- Prefixes are not semantically important; they are included only to
-- simplify pass-through parsing. When comparing names with 'Eq' or 'Ord'
-- methods, prefixes are ignored.
--
-- The @IsString@ instance supports Clark notation; see
-- <http://www.jclark.com/xml/xmlns.htm> and
-- <http://infohost.nmt.edu/tcc/help/pubs/pylxml/etree-QName.html>. Use
-- the @OverloadedStrings@ language extension for very simple @Name@
-- construction:
--
-- > myname :: Name
-- > myname = "{http://example.com/ns/my-namespace}my-name"
--
data Name = Name
	{ nameLocalName :: Text
	, nameNamespace :: Maybe Text
	, namePrefix :: Maybe Text
	}
	deriving (Data, Show, Typeable)

instance Eq Name where
	(==) = (==) `on` (\x -> (nameNamespace x, nameLocalName x))

instance Ord Name where
	compare = compare `on` (\x -> (nameNamespace x, nameLocalName x))

instance IsString Name where
	fromString "" = Name T.empty Nothing Nothing
	fromString full@('{':rest) = case break (== '}') rest of
		(_, "") -> error ("Invalid Clark notation: " ++ show full)
		(ns, local) -> Name (T.pack (drop 1 local)) (Just (T.pack ns)) Nothing
	fromString local = Name (T.pack local) Nothing Nothing

instance NFData Name where
	rnf (Name a b c) = rnf a `seq` rnf b `seq` rnf c `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Name
#endif

-- | Note: due to the incredible complexity of DTDs, this type only supports
-- external subsets. I've tried adding internal subset types, but they
-- quickly gain more code than the rest of this module put together.
--
-- It is possible that some future version of this library might support
-- internal subsets, but I am no longer actively working on adding them.
data Doctype = Doctype
	{ doctypeName :: Text
	, doctypeID :: Maybe ExternalID
	}
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData Doctype where
	rnf (Doctype a b) = rnf a `seq` rnf b `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Doctype
#endif

data ExternalID
	= SystemID Text
	| PublicID Text Text
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData ExternalID where
	rnf (SystemID a)   = rnf a `seq` ()
	rnf (PublicID a b) = rnf a `seq` rnf b `seq` ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic ExternalID
#endif

-- | Some XML processing tools are incremental, and work in terms of events
-- rather than node trees. The 'Event' type allows a document to be fully
-- specified as a sequence of events.
--
-- Event-based XML libraries include:
--
-- * <http://hackage.haskell.org/package/xml-enumerator>
--
-- * <http://hackage.haskell.org/package/libxml-enumerator>
--
-- * <http://hackage.haskell.org/package/expat-enumerator>
--
data Event
	= EventBeginDocument
	| EventEndDocument
	| EventBeginDoctype Text (Maybe ExternalID)
	| EventEndDoctype
	| EventInstruction Instruction
	| EventBeginElement Name [(Name, [Content])]
	| EventEndElement Name
	| EventContent Content
	| EventComment Text
	| EventCDATA Text
	deriving (Data, Eq, Ord, Show, Typeable)

instance NFData Event where
	rnf (EventBeginDoctype a b) = rnf a `seq` rnf b `seq` ()
	rnf (EventInstruction a)    = rnf a `seq` ()
	rnf (EventBeginElement a b) = rnf a `seq` rnf b `seq` ()
	rnf (EventEndElement a)     = rnf a `seq` ()
	rnf (EventContent a)        = rnf a `seq` ()
	rnf (EventComment a)        = rnf a `seq` ()
	rnf (EventCDATA a)          = rnf a `seq` ()
	rnf _                       = ()

#if MIN_VERSION_base(4,4,0)
deriving instance Generic Event
#endif

isElement :: Node -> [Element]
isElement (NodeElement e) = [e]
isElement _ = []

isInstruction :: Node -> [Instruction]
isInstruction (NodeInstruction i) = [i]
isInstruction _ = []

isContent :: Node -> [Content]
isContent (NodeContent c) = [c]
isContent _ = []

isComment :: Node -> [Text]
isComment (NodeComment t) = [t]
isComment _ = []

isNamed :: Name -> Element -> [Element]
isNamed n e = [e | elementName e == n]

elementChildren :: Element -> [Element]
elementChildren = elementNodes >=> isElement

elementContent :: Element -> [Content]
elementContent = elementNodes >=> isContent

elementText :: Element -> [Text]
elementText = elementContent >=> contentText

nodeChildren :: Node -> [Node]
nodeChildren = isElement >=> elementNodes

nodeContent :: Node -> [Content]
nodeContent = nodeChildren >=> isContent

nodeText :: Node -> [Text]
nodeText = nodeContent >=> contentText

hasAttribute :: Name -> Element -> [Element]
hasAttribute name e = [e | isJust (attributeContent name e)]

hasAttributeText :: Name -> (Text -> Bool) -> Element -> [Element]
hasAttributeText name p e = [e | maybe False p (attributeText name e)]

attributeContent :: Name -> Element -> Maybe [Content]
attributeContent name e = lookup name (elementAttributes e)

attributeText :: Name -> Element -> Maybe Text
attributeText name e = fmap contentFlat (attributeContent name e)

contentText :: Content -> [Text]
contentText (ContentText t) = [t]
contentText (ContentEntity entity) = [T.pack "&", entity, T.pack ";"]

contentFlat :: [Content] -> Text
contentFlat cs = T.concat (cs >>= contentText)