-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.DTDValidation.TypeDefs
   Copyright  : Copyright (C) 2008 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   This module provides functions for validating the DTD of XML documents
   represented as XmlTree.

   Unlike other popular XML validation tools the validation process returns
   a list of errors instead of aborting after the first error was found.


   Unlike validation of the document, the DTD branch is traversed four times:

    - Validation of Notations

    - Validation of Unparsed Entities

    - Validation of Element declarations

    - Validation of Attribute declarations

-}

-- ------------------------------------------------------------

module Text.XML.HXT.DTDValidation.DTDValidation
    ( removeDoublicateDefs
    , validateDTD
    )
where

import Text.XML.HXT.DTDValidation.TypeDefs
import Text.XML.HXT.DTDValidation.AttributeValueValidation

-- |
-- Validate a DTD.
--
--    - returns : a functions which takes the DTD subset of the XmlTree, checks
--                  if the DTD is valid and returns a list of errors

validateDTD :: XmlArrow
validateDTD -- dtdPart
    = isDTDDoctype
      `guards`
      ( listA getChildren
        >>>
        ( validateParts $<< (getNotationNames &&& getElemNames) )
      )
    where
    validateParts notationNames elemNames
        = validateNotations
          <+>
          validateEntities notationNames
          <+>
          validateElements elemNames
          <+>
          validateAttributes elemNames notationNames

    getNotationNames    :: LA [XmlTree] [String]
    getNotationNames    = listA $ unlistA >>> isDTDNotation >>> getDTDAttrValue a_name

    getElemNames        :: LA [XmlTree] [String]
    getElemNames        = listA $ unlistA >>> isDTDElement  >>> getDTDAttrValue a_name

-- ------------------------------------------------------------

checkName       :: String -> SLA [String] XmlTree XmlTree -> SLA [String] XmlTree XmlTree
checkName name msg
    = ifA ( getState
            >>>
            isA (name `elem`)
          )
      msg
      (nextState (name:) >>> none)

-- ------------------------------------------------------------

-- |
-- Validation of Notations, checks if all notation names are unique.
-- Validity constraint: Unique Notation Name (4.7 \/ p.44 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - returns : a list of errors

validateNotations :: LA XmlTrees XmlTree
validateNotations
    = fromSLA [] ( unlistA
                   >>>
                   isDTDNotation
                   >>>
                   (checkForUniqueNotation $< getDTDAttrl)
                 )
      where
      checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree
      checkForUniqueNotation al
          = checkName name $
            err ( "Notation "++ show name ++ " was already specified." )
          where
          name = dtd_name al

-- |
-- Validation of Entities.
--
-- 1. Issues a warning if entities are declared multiple times.
--
--    Optional warning: (4.2 \/ p.35 in Spec)
--
--
-- 2. Validates that a notation is declared for an unparsed entity.
--
--    Validity constraint: Notation Declared (4.2.2 \/ p.36 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter notationNames :  list of all notation names declared in the DTD
--
--    - returns : a list of errors

validateEntities        :: [String] -> LA XmlTrees XmlTree
validateEntities notationNames
    = ( fromSLA [] ( unlistA
                     >>>
                     isDTDEntity
                     >>>
                     (checkForUniqueEntity $< getDTDAttrl)
                   )
      )
      <+>
      ( unlistA
        >>>
        isUnparsedEntity
        >>>
        (checkNotationDecl $< getDTDAttrl)
      )
      where

      -- Check if entities are declared multiple times

      checkForUniqueEntity      :: Attributes -> SLA [String] XmlTree XmlTree
      checkForUniqueEntity al
          = checkName name $
            warn ( "Entity "++ show name ++ " was already specified. " ++
                    "First declaration will be used." )
          where
          name = dtd_name al

      -- Find unparsed entities for which no notation is specified

      checkNotationDecl         :: Attributes -> XmlArrow
      checkNotationDecl al
          | notationName `elem` notationNames
              = none
          | otherwise
              = err ( "The notation " ++ show notationName ++ " must be declared " ++
                      "when referenced in the unparsed entity declaration for " ++
                      show upEntityName ++ "."
                    )
          where
          notationName = lookup1 k_ndata al
          upEntityName = dtd_name  al

-- |
-- Validation of Element declarations.
--
-- 1. Validates that an element is not declared multiple times.
--
--    Validity constraint: Unique Element Type Declaration (3.2 \/ p.21 in Spec)
--
--
-- 2. Validates that an element name only appears once in a mixed-content declaration.
--
--    Validity constraint: No Duplicate Types (3.2 \/ p.21 in Spec)
--
--
-- 3. Issues a warning if an element mentioned in a content model is not declared in the
--    DTD.
--
--    Optional warning: (3.2 \/ p.21 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter elemNames :  list of all element names declared in the DTD
--
--    - returns : a list of errors


validateElements        :: [String] -> LA XmlTrees XmlTree
validateElements elemNames -- dtdPart
    = ( fromSLA [] ( unlistA
                     >>>
                     isDTDElement
                     >>>
                     (checkForUniqueElement $< getDTDAttrl)
                   )
      )
      <+>
      ( unlistA
        >>>
        isMixedContentElement
        >>>
        (checkMixedContent $< getDTDAttrl)
      )
      <+>
      ( unlistA
        >>>
        isDTDElement
        >>>
        (checkContentModel elemNames $< getDTDAttrl)
      )
      where

      -- Validates that an element is not declared multiple times

      checkForUniqueElement :: Attributes -> SLA [String] XmlTree XmlTree
      checkForUniqueElement al
          = checkName name $
            err ( "Element type " ++ show name ++
                  " must not be declared more than once." )
          where
          name = dtd_name al

      -- Validates that an element name only appears once in a mixed-content declaration

      checkMixedContent :: Attributes -> XmlArrow
      checkMixedContent al
          = fromSLA [] ( getChildren
                         >>>
                         getChildren
                         >>>
                         isDTDName
                         >>>
                         (check $< getDTDAttrl)
                       )
            where
            elemName = dtd_name al
            check al'
                = checkName name $
                  err ( "The element type " ++ show name ++
                         " was already specified in the mixed-content model of the element declaration " ++
                         show elemName ++ "." )
                where
                name = dtd_name al'

      -- Issues a warning if an element mentioned in a content model is not
      -- declared in the DTD.
      checkContentModel :: [String] -> Attributes -> XmlArrow
      checkContentModel names al
          | cm `elem` [v_children, v_mixed]
              = getChildren >>> checkContent
          | otherwise
              = none
          where
          elemName = dtd_name al
          cm       = dtd_type al

          checkContent :: XmlArrow
          checkContent
              = choiceA
                [ isDTDName    :-> ( checkName' $< getDTDAttrl )
                , isDTDContent :-> ( getChildren >>> checkContent )
                , this         :-> none
                ]
              where
              checkName' al'
                  | childElemName `elem` names
                      = none
                  | otherwise
                      = warn ( "The element type "++ show childElemName ++
                               ", used in content model of element "++ show elemName ++
                               ", is not declared."
                             )
                  where
                  childElemName = dtd_name al'

-- |
-- Validation of Attribute declarations.
--
-- (1) Issues a warning if an attribute is declared for an element type not itself
--    decared.
--
--    Optinal warning: (3.3 \/ p. 24 in Spec)
--
--
-- 2. Issues a warning if more than one definition is provided for the same
--    attribute of a given element type. Fist declaration is binding, later
--    definitions are ignored.
--
--    Optional warning: (3.3 \/ p.24 in Spec)
--
--
-- 3. Issues a warning if the same Nmtoken occures more than once in enumerated
--    attribute types of a single element type.
--
--    Optional warning: (3.3.1 \/ p.27 in Spec)
--
--
-- 4. Validates that an element type has not more than one ID attribute defined.
--
--    Validity constraint: One ID per Element Type (3.3.1 \/ p.26 in Spec)
--
--
-- 5. Validates that an element type has not more than one NOTATION attribute defined.
--
--    Validity constraint: One Notation per Element Type (3.3.1 \/ p.27 in Spec)
--
--
-- 6. Validates that an ID attributes has the type #IMPLIED or #REQUIRED.
--
--    Validity constraint: ID Attribute Default (3.3.1 \/ p.26 in Spec)
--
--
-- 7. Validates that all referenced notations are declared.
--
--    Validity constraint: Notation Attributes (3.3.1 \/ p.27 in Spec)
--
--
-- 8. Validates that notations are not declared for EMPTY elements.
--
--    Validity constraint: No Notation on Empty Element (3.3.1 \/p.27 in Spec)
--
--
-- 9. Validates that the default value matches the lexical constraints of it's type.
--
--    Validity constraint: Attribute default legal (3.3.2 \/ p.28 in Spec)
--
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter elemNames :  list of all element names declared in the DTD
--
--    - 3.parameter notationNames :  list of all notation names declared in the DTD
--
--    - returns : a list of errors

validateAttributes :: [String] -> [String] -> LA XmlTrees XmlTree
validateAttributes elemNames notationNames
    = -- 1. Find attributes for which no elements are declared
      ( runCheck this (checkDeclaredElements elemNames) )
      <+>
      -- 2. Find attributes which are declared more than once
      ( runNameCheck this checkForUniqueAttributeDeclaration )
      <+>
      -- 3. Find enumerated attribute types which nmtokens are declared more than once
      ( runCheck (isEnumAttrType `orElse` isNotationAttrType) checkEnumeratedTypes )
      <+>
      -- 4. Validate that there exists only one ID attribute for an element
      ( runNameCheck isIdAttrType checkForUniqueId )
      <+>
      -- 5. Validate that there exists only one NOTATION attribute for an element
      ( runNameCheck isNotationAttrType checkForUniqueNotation )
      <+>
      -- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED
      ( runCheck isIdAttrType checkIdKindConstraint )
      <+>
      -- 7. Validate that all referenced notations are declared
      ( runCheck isNotationAttrType (checkNotationDeclaration notationNames) )
      <+>
      -- 8. Validate that notations are not declared for EMPTY elements
      ( checkNoNotationForEmptyElements $< listA ( unlistA
                                                   >>>
                                                   isEmptyElement
                                                   >>>
                                                   getDTDAttrValue a_name
                                                 )
      )
      <+>
      -- 9. Validate that the default value matches the lexical constraints of it's type
      ( checkDefaultValueTypes $< this )

      where
      -- ------------------------------------------------------------
      -- control structures

      runCheck select check
          = unlistA >>> isDTDAttlist
            >>>
            select
            >>>
            (check $< getDTDAttrl)

      runNameCheck select check
          = fromSLA [] $ runCheck select check

      --------------------------------------------------------------------------

      -- 1. Find attributes for which no elements are declared

      checkDeclaredElements :: [String] -> Attributes -> XmlArrow
      checkDeclaredElements elemNames' al
          | en `elem` elemNames'
              = none
          | otherwise
              = warn ( "The element type \""++ en ++ "\" used in dclaration "++
                       "of attribute \""++ an ++"\" is not declared."
                     )
          where
          en = dtd_name al
          an = dtd_value al

      --------------------------------------------------------------------------

      -- 2. Find attributes which are declared more than once

      checkForUniqueAttributeDeclaration ::  Attributes -> SLA [String] XmlTree XmlTree
      checkForUniqueAttributeDeclaration al
          = checkName name $
            warn ( "Attribute \""++ aname ++"\" for element type \""++
                   ename ++"\" is already declared. First "++
                   "declaration will be used." )
          where
          ename = dtd_name al
          aname = dtd_value al
          name  = ename ++ "|" ++ aname

      --------------------------------------------------------------------------

      -- 3. Find enumerated attribute types which nmtokens are declared more than once

      checkEnumeratedTypes :: Attributes -> XmlArrow
      checkEnumeratedTypes al
          = fromSLA [] ( getChildren
                         >>>
                         isDTDName
                         >>>
                         (checkForUniqueType $< getDTDAttrl)
                       )
          where
          checkForUniqueType :: Attributes -> SLA [String] XmlTree XmlTree
          checkForUniqueType al'
              = checkName nmtoken $
                warn ( "Nmtoken \""++ nmtoken ++"\" should not "++
                       "occur more than once in attribute \""++ dtd_value al ++
                       "\" for element \""++ dtd_name al ++ "\"." )
              where
              nmtoken = dtd_name al'

      --------------------------------------------------------------------------

      -- 4. Validate that there exists only one ID attribute for an element

      checkForUniqueId :: Attributes -> SLA [String] XmlTree XmlTree
      checkForUniqueId al
          = checkName ename $
            err ( "Element \""++ ename ++ "\" already has attribute of type "++
                  "ID, another attribute \""++ dtd_value al ++ "\" of type ID is "++
                  "not permitted." )
          where
          ename = dtd_name al

      --------------------------------------------------------------------------

      -- 5. Validate that there exists only one NOTATION attribute for an element

      checkForUniqueNotation :: Attributes -> SLA [String] XmlTree XmlTree
      checkForUniqueNotation al
          = checkName ename $
            err ( "Element \""++ ename ++ "\" already has attribute of type "++
                  "NOTATION, another attribute \""++ dtd_value al ++ "\" of type NOTATION "++
                  "is not permitted." )
          where
          ename = dtd_name al

      --------------------------------------------------------------------------

      -- 6. Validate that ID attributes have the type #IMPLIED or #REQUIRED

      checkIdKindConstraint :: Attributes -> XmlArrow
      checkIdKindConstraint al
          | attKind `elem` [k_implied, k_required]
              = none
          | otherwise
              = err ( "ID attribute \""++ dtd_value al ++"\" must have a declared default "++
                      "of \"#IMPLIED\" or \"REQUIRED\"")
          where
          attKind = dtd_kind al


      --------------------------------------------------------------------------

      -- 7. Validate that all referenced notations are declared

      checkNotationDeclaration :: [String] -> Attributes -> XmlArrow
      checkNotationDeclaration notations al
          = getChildren
            >>>
            isDTDName
            >>>
            (checkNotations $< getDTDAttrl)
          where
          checkNotations :: Attributes -> XmlArrow
          checkNotations al'
              | notation `elem` notations
                  = none
              | otherwise
                  = err ( "The notation \""++ notation ++"\" must be declared when "++
                          "referenced in the notation type list for attribute \""++ dtd_value al ++
                          "\" of element \""++ dtd_name al ++"\"."
                        )
              where
              notation = dtd_name al'

      --------------------------------------------------------------------------

      -- 8. Validate that notations are not declared for EMPTY elements

      checkNoNotationForEmptyElements :: [String] -> LA XmlTrees XmlTree
      checkNoNotationForEmptyElements emptyElems
          = unlistA
            >>>
            isDTDAttlist
            >>>
            isNotationAttrType
            >>>
            (checkNoNotationForEmptyElement $< getDTDAttrl)
          where
          checkNoNotationForEmptyElement :: Attributes -> XmlArrow
          checkNoNotationForEmptyElement al
              | ename `elem` emptyElems
                  = err ( "Attribute \""++ dtd_value al ++"\" of type NOTATION must not be "++
                          "declared on the element \""++ ename ++"\" declared EMPTY."
                        )
              | otherwise
                  = none
              where
              ename = dtd_name al

      --------------------------------------------------------------------------

      -- 9. Validate that default values meet the lexical constraints of the attribute types

      checkDefaultValueTypes :: XmlTrees -> LA XmlTrees XmlTree
      checkDefaultValueTypes dtdPart'
          = unlistA >>> isDTDAttlist
            >>>
            isDefaultAttrKind
            >>>
            (checkAttributeValue dtdPart' $< this)

-- ------------------------------------------------------------

-- |
-- Removes doublicate declarations from the DTD, which first declaration is
-- binding. This is the case for ATTLIST and ENTITY declarations.
--
--    - returns : A function that replaces the children of DOCTYPE nodes by a list
--               where all multiple declarations are removed.

removeDoublicateDefs :: XmlArrow
removeDoublicateDefs
    = replaceChildren
      ( fromSLA [] ( getChildren
                     >>>
                     choiceA [ isDTDAttlist :-> (removeDoubleAttlist $< getDTDAttrl)
                             , isDTDEntity  :-> (removeDoubleEntity  $< getDTDAttrl)
                             , this         :-> this
                             ]
                   )
      )
      `when`
      isDTDDoctype
    where
    checkName' n'
        = ifA ( getState
                >>>
                isA (n' `elem`)
              )
          none
          (this >>> perform (nextState (n':)))

    removeDoubleAttlist :: Attributes -> SLA [String] XmlTree XmlTree
    removeDoubleAttlist al
        = checkName' elemAttr
        where
        elemAttr = elemName ++ "|" ++ attrName
        attrName = dtd_value al
        elemName = dtd_name al

    removeDoubleEntity  :: Attributes -> SLA [String] XmlTree XmlTree
    removeDoubleEntity al
        = checkName' (dtd_name al)

-- ------------------------------------------------------------