{- | The examples from the HXT tutorial at haskell.org "http://www.haskell.org/haskellwiki/HXT" -} module Main where import Text.XML.HXT.Core -- basic HXT stuff import Text.XML.HXT.XPath -- additional XPath functions import Text.XML.HXT.Curl -- Curl HTTP handler import Data.List -- auxiliary functions import Data.Maybe import System.Environment import System.Console.GetOpt() import System.Exit -- | call this program with 3 arguments, -- the function name, see list of examples, -- the input URL or file -- and the output file, - for stdout -- -- example: SimpleExamples selectAllText http://www.haskell.org/ - main :: IO () main = do argv <- getArgs (al, fct, src, dst) <- cmdlineOpts argv [rc] <- runX (application al fct src dst) if rc >= c_err then exitWith (ExitFailure 1) else exitWith ExitSuccess application :: SysConfigList -> String -> String -> String -> IOSArrow b Int application config fct src dst = configSysVars config -- set all global config options >>> readDocument [] src >>> processChildren (processRootElement fct `when` isElem) >>> writeDocument [ withIndent yes, withOutputEncoding isoLatin1 ] dst >>> getErrStatus -- | the dummy for the boring stuff of option evaluation, -- usually done with 'System.Console.GetOpt' cmdlineOpts :: [String] -> IO (SysConfigList, String, String, String) cmdlineOpts argv = return ( [ withValidate no , withParseHTML yes , withCurl [] ] , argv!!0 , argv!!1 , argv!!2 ) -- | the processing examples examples :: [ (String, IOSArrow XmlTree XmlTree) ] examples = [ ( "selectAllText", selectAllText ) , ( "selectAllTextAndAltValues", selectAllTextAndAltValues ) , ( "selectAllTextAndRealAltValues", selectAllTextAndRealAltValues ) , ( "addRefIcon", addRefIcon ) , ( "helloWorld", helloWorld ) , ( "helloWorld2", helloWorld2 ) , ( "imageTable", imageTable ) , ( "imageTable0", imageTable0 ) , ( "imageTable1", imageTable1 ) , ( "imageTable2", imageTable2 ) , ( "imageTable3", imageTable3 ) , ( "toAbsHRefs", toAbsHRefs ) , ( "toAbsRefs", toAbsRefs ) , ( "toAbsRefs1", toAbsRefs1 ) ] processRootElement :: String -> IOSArrow XmlTree XmlTree processRootElement fct = fromMaybe this . lookup fct $ examples -- | selection arrows selectAllText :: ArrowXml a => a XmlTree XmlTree selectAllText = selem "the-plain-text" [ deep isText ] -- create a root element, neccessary for wellformed XML output selectAllTextAndAltValues :: ArrowXml a => a XmlTree XmlTree selectAllTextAndAltValues = selem "the-plain-text" [ deep ( isText <+> ( isElem >>> hasName "img" >>> getAttrValue "alt" >>> mkText ) ) ] selectAllTextAndRealAltValues :: ArrowXml a => a XmlTree XmlTree selectAllTextAndRealAltValues = selem "the-plain-text" [ deep ( isText <+> ( isElem >>> hasName "img" >>> getAttrValue "alt" >>> isA significant >>> arr addBrackets >>> mkText ) ) ] where significant :: String -> Bool significant = not . all (`elem` " \n\r\t") addBrackets :: String -> String addBrackets s = " [[ " ++ s ++ " ]] " -- | transformation arrows addRefIcon :: ArrowXml a => a XmlTree XmlTree addRefIcon = processTopDown ( addImg `when` isExternalRef ) where isExternalRef = isElem >>> hasName "a" >>> hasAttr "href" >>> getAttrValue "href" >>> isA isExtRef where isExtRef = isPrefixOf "http:" addImg = replaceChildren ( getChildren <+> imgElement ) imgElement = mkelem "img" [ sattr "src" "/icons/ref.png" , sattr "alt" "external ref" ] [] -- | construction examples helloWorld :: ArrowXml a => a XmlTree XmlTree helloWorld = mkelem "html" [] [ mkelem "head" [] [ mkelem "title" [] [ txt "Hello World" ] ] , mkelem "body" [ sattr "class" "haskell" ] [ mkelem "h1" [] [ txt "Hello World" ] ] ] helloWorld2 :: ArrowXml a => a XmlTree XmlTree helloWorld2 = selem "html" [ selem "head" [ selem "title" [ txt "Hello World" ] ] , mkelem "body" [ sattr "class" "haskell" ] [ selem "h1" [ txt "Hello World" ] ] ] imageTable :: ArrowXml a => a XmlTree XmlTree imageTable = selem "html" [ selem "head" [ selem "title" [ txt "Images in Page" ] ] , selem "body" [ selem "h1" [ txt "Images in Page" ] , selem "table" [ collectImages >>> genTableRows ] ] ] where genTableRows = selem "tr" [ selem "td" [ getAttrValue "src" >>> mkText ] ] imageTable0 :: ArrowXml a => a XmlTree XmlTree imageTable0 = selem "html" [ pageHeader , selem "body" [ selem "h1" [ txt "Images in Page" ] , selem "table" [ collectImages >>> genTableRows ] ] ] where pageHeader = constA "