name: html-parse
version: 0.2.0.1
synopsis: A high-performance HTML tokenizer
description:
This package provides a fast and reasonably robust HTML5 tokenizer built
upon the @attoparsec@ library. The parsing strategy is based upon the HTML5
parsing specification with few deviations.
.
The package targets similar use-cases to the venerable @tagsoup@ library,
but is significantly more efficient, achieving parsing speeds of over 50
megabytes per second on modern hardware with and typical web documents.
.
For instance,
.
>>> parseTokens "
"
[TagOpen "div" [],TagOpen "h1" [Attr "class" "widget"],
ContentText "Hello World",TagClose "h1",TagSelfClose "br" []]
homepage: http://github.com/bgamari/html-parse
license: BSD3
license-file: LICENSE
author: Ben Gamari
maintainer: ben@smart-cactus.org
copyright: (c) 2016 Ben Gamari
category: Text
build-type: Simple
cabal-version: >=1.10
tested-with: GHC==8.0.2, GHC==7.10.3, GHC==7.8.4
source-repository head
type: git
location: git://github.com/bgamari/html-parse
library
exposed-modules: Text.HTML.Parser, Text.HTML.Tree
ghc-options: -Wall
other-extensions: OverloadedStrings, DeriveGeneric
build-depends: base >=4.7 && <4.11,
deepseq >=1.4 && <1.5,
attoparsec >=0.13 && <0.14,
text >=1.2 && <1.3,
containers >=0.5 && <0.6
default-language: Haskell2010
benchmark bench
type: exitcode-stdio-1.0
main-is: Benchmark.hs
other-extensions: OverloadedStrings, DeriveGeneric
build-depends: base,
deepseq,
attoparsec,
text,
tagsoup >= 0.13,
criterion >= 1.1
default-language: Haskell2010
test-suite spec
type: exitcode-stdio-1.0
hs-source-dirs: tests
main-is: Spec.hs
other-modules: Text.HTML.ParserSpec, Text.HTML.TreeSpec
ghc-options: -Wall -with-rtsopts=-T
build-depends: base,
containers,
hspec,
hspec-discover,
html-parse,
QuickCheck,
quickcheck-instances,
string-conversions,
text
default-language: Haskell2010