--------------------------------------------------------------------------------
-- Haskell bindings for c14n implementation in libxml                         --
--------------------------------------------------------------------------------
-- This source code is licensed under the MIT license found in the LICENSE    --
-- file in the root directory of this source tree.                            --
--------------------------------------------------------------------------------

-- | Provides a mid-level interface to libxml's implementation of c14n, i.e.
-- XML canonicalisation.
module Text.XML.C14N (
    -- * Canonicalisation
    c14n_1_0,
    c14n_exclusive_1_0,
    c14n_1_1,
    c14n,

    -- * Parsing
    xml_opt_recover,
    xml_opt_noent,
    xml_opt_dtdload,
    xml_opt_dtdattr,
    xml_opt_dtdvalid,
    xml_opt_noerror,
    xml_opt_nowarning,
    xml_opt_pedantic,
    xml_opt_noblanks,
    xml_opt_sax1,
    xml_opt_xinclude,
    xml_opt_nonet,
    xml_opt_nodict,
    xml_opt_nsclean,
    xml_opt_nocdata,
    xml_opt_noxincnode,
    xml_opt_compact,
    xml_opt_old10,
    xml_opt_nobasefix,
    xml_opt_huge,
    xml_opt_oldsax,
    xml_opt_ignore_env,
    xml_opt_big_lines,
    parseXml
) where

--------------------------------------------------------------------------------

import Control.Exception

import Data.Bits
import qualified Data.ByteString as BS
import qualified Data.ByteString.Unsafe as BS

import Text.XML.C14N.LibXML

import Foreign.Ptr
import Foreign.ForeignPtr
import Foreign.Marshal
import Foreign.Storable
import Foreign.C.Error
import Foreign.C.Types

--------------------------------------------------------------------------------

-- | 'parseXml' @parseOpts text@ parses @text@ into an XML document using
-- libxml according to options given by @parseOpts@.
parseXml :: [CInt] -> BS.ByteString -> IO (ForeignPtr LibXMLDoc)
parseXml :: [CInt] -> ByteString -> IO (ForeignPtr LibXMLDoc)
parseXml [CInt]
opts ByteString
bin = forall a. FinalizerPtr a -> Ptr a -> IO (ForeignPtr a)
newForeignPtr FunPtr (Ptr LibXMLDoc -> IO ())
xmlFreeDoc forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<
    (forall a. ByteString -> (CStringLen -> IO a) -> IO a
BS.useAsCStringLen ByteString
bin forall a b. (a -> b) -> a -> b
$ \(Ptr CChar
ptr, Int
len) ->
        forall a. String -> IO (Ptr a) -> IO (Ptr a)
throwErrnoIfNull String
"xmlReadMemory" forall a b. (a -> b) -> a -> b
$ Ptr CChar
-> CInt -> Ptr CChar -> Ptr CChar -> CInt -> IO (Ptr LibXMLDoc)
xmlReadMemory
            Ptr CChar
ptr (forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len) forall a. Ptr a
nullPtr forall a. Ptr a
nullPtr (forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl forall a. Bits a => a -> a -> a
(.|.) CInt
0 [CInt]
opts))

-- | 'withXmlXPathNodeList' @docPtr xPathLocation continuation@ evaluates the
-- XPath location path given by @xPathLocation@ in the document context
-- pointed at by @docPtr@ and calls @continuation@ with the result.
withXmlXPathNodeList :: Ptr LibXMLDoc
                     -> BS.ByteString
                     -> (Ptr LibXMLNodeSet -> IO a)
                     -> IO a
withXmlXPathNodeList :: forall a.
Ptr LibXMLDoc -> ByteString -> (Ptr LibXMLNodeSet -> IO a) -> IO a
withXmlXPathNodeList Ptr LibXMLDoc
docPtr ByteString
expr Ptr LibXMLNodeSet -> IO a
cont =
    -- initialise a new XPath context, run the continuation with the context
    -- as argument, and then free up the context again afterwards
    forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket (Ptr LibXMLDoc -> IO (Ptr LibXMLXPathCtx)
xmlXPathNewContext Ptr LibXMLDoc
docPtr) Ptr LibXMLXPathCtx -> IO ()
xmlXPathFreeContext forall a b. (a -> b) -> a -> b
$ \Ptr LibXMLXPathCtx
ctx ->
    -- get a C string pointer for the XPath location path
    forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString ByteString
expr forall a b. (a -> b) -> a -> b
$ \Ptr CChar
strPtr ->
    -- evaluate the XPath location path and free up the resulting object
    -- after the continuation is finished; see
    -- http://xmlsoft.org/html/libxml-xpath.html#xmlXPathEval
    forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket
        ( forall a. String -> IO (Ptr a) -> IO (Ptr a)
throwErrnoIfNull String
"xmlXPathEval" forall a b. (a -> b) -> a -> b
$
            Ptr LibXMLChar -> Ptr LibXMLXPathCtx -> IO (Ptr LibXMLXPathObj)
xmlXPathEval (forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
strPtr) Ptr LibXMLXPathCtx
ctx
        )
        Ptr LibXMLXPathObj -> IO ()
xmlXPathFreeObject
        -- the XPath object structure contains the node set pointer
        -- at offset 8; see
        -- http://xmlsoft.org/html/libxml-xpath.html#xmlXPathObject
        forall a b. (a -> b) -> a -> b
$ \Ptr LibXMLXPathObj
a -> forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr LibXMLXPathObj
a Int
8 forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Ptr LibXMLNodeSet -> IO a
cont

-- | 'c14n' @parseOpts mode nsPrefixes keepComments xPathLocation input@
-- canonicalises the document given by @input@, which is parsed using options
-- specified by @parseOpts@. The @mode@ argument deteremines the
-- canonicalisation mode to use. @nsPrefixes@ gives a (potentially empty)
-- list of namespace prefixes which is used when @mode@ is
-- 'c14n_exclusive_1_0'. If @keepComments@ is 'True', all comments are kept
-- in the output. @xPathLocation@ is used to select a set of nodes that should
-- be included in the canonicalised result.
c14n :: [CInt]
     -> CInt
     -> [BS.ByteString]
     -> Bool
     -> Maybe BS.ByteString
     -> BS.ByteString
     -> IO BS.ByteString
c14n :: [CInt]
-> CInt
-> [ByteString]
-> Bool
-> Maybe ByteString
-> ByteString
-> IO ByteString
c14n [CInt]
opts CInt
mode [ByteString]
nsPrefixes Bool
keepComments Maybe ByteString
xpath ByteString
bin =
    -- parse the input xml
    [CInt] -> ByteString -> IO (ForeignPtr LibXMLDoc)
parseXml [CInt]
opts ByteString
bin forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \ForeignPtr LibXMLDoc
docPtr ->
    -- wrap the pointer we got in a foreign pointer
    forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr LibXMLDoc
docPtr forall a b. (a -> b) -> a -> b
$ \Ptr LibXMLDoc
ptr ->
    -- convert the namespace prefixes into C strings
    forall a b res.
(a -> (b -> res) -> res) -> [a] -> ([b] -> res) -> res
withMany forall a. ByteString -> (Ptr CChar -> IO a) -> IO a
BS.useAsCString [ByteString]
nsPrefixes forall a b. (a -> b) -> a -> b
$ \[Ptr CChar]
inclPtr ->
    -- turn the Haskell list of C strings into a C array,
    -- terminated by NULL
    forall a b. Storable a => a -> [a] -> (Ptr a -> IO b) -> IO b
withArray0 forall a. Ptr a
nullPtr [Ptr CChar]
inclPtr forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr CChar)
arrayPtr ->
    -- get a pointer to the node set
    forall a b c.
(a -> (Ptr b -> IO c) -> IO c)
-> Maybe a -> (Ptr b -> IO c) -> IO c
maybeWith (forall a.
Ptr LibXMLDoc -> ByteString -> (Ptr LibXMLNodeSet -> IO a) -> IO a
withXmlXPathNodeList Ptr LibXMLDoc
ptr) Maybe ByteString
xpath forall a b. (a -> b) -> a -> b
$ \Ptr LibXMLNodeSet
nsPtr ->
    -- allocate some memory for a pointer to the results
    forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr LibXMLChar)
outPtr -> do
        -- convert the option determining whether to keep comments from a
        -- Haskell boolean to a CInt
        let commentsOpt :: CInt
commentsOpt = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Enum a => a -> Int
fromEnum Bool
keepComments)
        -- cast from CChar pointers to whatever LibXMLChar is (e.g. Word8)
        let prefixesPtr :: Ptr (Ptr LibXMLChar)
            prefixesPtr :: Ptr (Ptr LibXMLChar)
prefixesPtr = forall a b. Ptr a -> Ptr b
castPtr Ptr (Ptr CChar)
arrayPtr

        -- run the canonicalisation function on the document;
        -- this function returns the number of bytes that were written
        -- to outPtr or a negative value if this fails
        CInt
numBytes <- forall a. (a -> Bool) -> String -> IO a -> IO a
throwErrnoIf (forall a. Ord a => a -> a -> Bool
<CInt
0) String
"xmlC14NDocDumpMemory" forall a b. (a -> b) -> a -> b
$
            Ptr LibXMLDoc
-> Ptr LibXMLNodeSet
-> CInt
-> Ptr (Ptr LibXMLChar)
-> CInt
-> Ptr (Ptr LibXMLChar)
-> IO CInt
xmlC14NDocDumpMemory Ptr LibXMLDoc
ptr Ptr LibXMLNodeSet
nsPtr CInt
mode Ptr (Ptr LibXMLChar)
prefixesPtr CInt
commentsOpt Ptr (Ptr LibXMLChar)
outPtr

        -- dereference the results pointer
        Ptr LibXMLChar
ptrPtr <- forall a. Storable a => Ptr a -> IO a
peek Ptr (Ptr LibXMLChar)
outPtr

        -- construct a ByteString from the C string and return it
        Ptr LibXMLChar -> Int -> IO () -> IO ByteString
BS.unsafePackCStringFinalizer
            Ptr LibXMLChar
ptrPtr (forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
numBytes) (forall a. Ptr a -> IO ()
freeXml Ptr LibXMLChar
ptrPtr)

--------------------------------------------------------------------------------