module HaskellWorks.Data.Xml.Conduit
( blankedXmlToInterestBits
, byteStringToBits
, blankedXmlToBalancedParens
, blankedXmlToBalancedParens2
, compressWordAsBit
, interestingWord8s
, isInterestingWord8
) where
import Control.Monad
import Data.Array.Unboxed as A
import Data.ByteString as BS
import Data.Conduit
import Data.Word
import Data.Word8
import HaskellWorks.Data.Bits.BitWise
import Prelude as P
import qualified Data.Bits as BITS
interestingWord8s :: A.UArray Word8 Word8
interestingWord8s = A.array (0, 255) [
(w, if w == _bracketleft
|| w == _braceleft
|| w == _parenleft
|| w == _bracketleft
|| w == _less
|| w == _a || w == _v || w == _t
then 1
else 0)
| w <- [0 .. 255]]
isInterestingWord8 :: Word8 -> Word8
isInterestingWord8 b = interestingWord8s ! b
blankedXmlToInterestBits :: Monad m => Conduit BS.ByteString m BS.ByteString
blankedXmlToInterestBits = blankedXmlToInterestBits' ""
blankedXmlToInterestBits' :: Monad m => BS.ByteString -> Conduit BS.ByteString m BS.ByteString
blankedXmlToInterestBits' rs = do
mbs <- await
case mbs of
Just bs -> do
let cs = if BS.length rs /= 0 then BS.concat [rs, bs] else bs
let lencs = BS.length cs
let q = lencs `quot` 8
let (ds, es) = BS.splitAt (q * 8) cs
let (fs, _) = BS.unfoldrN q gen ds
yield fs
blankedXmlToInterestBits' es
Nothing -> do
let lenrs = BS.length rs
let q = lenrs + 7 `quot` 8
yield (fst (BS.unfoldrN q gen rs))
where gen :: ByteString -> Maybe (Word8, ByteString)
gen as = if BS.length as == 0
then Nothing
else Just ( BS.foldr' (\b m -> (interestingWord8s ! b) .|. (m .<. 1)) 0 (BS.take 8 as)
, BS.drop 8 as
)
blankedXmlToBalancedParens :: Monad m => Conduit BS.ByteString m Bool
blankedXmlToBalancedParens = do
mbs <- await
case mbs of
Just bs -> blankedXmlToBalancedParens' bs
Nothing -> return ()
blankedXmlToBalancedParens' :: Monad m => BS.ByteString -> Conduit BS.ByteString m Bool
blankedXmlToBalancedParens' bs = case BS.uncons bs of
Just (c, cs) -> do
case c of
d | d == _less -> yield True
d | d == _greater -> yield False
d | d == _bracketleft -> yield True
d | d == _bracketright -> yield False
d | d == _parenleft -> yield True
d | d == _parenright -> yield False
d | d == _a -> yield True >> yield False
d | d == _v -> yield True >> yield False
d | d == _t -> yield True >> yield False
_ -> return ()
blankedXmlToBalancedParens' cs
Nothing -> return ()
repartitionMod8 :: BS.ByteString -> BS.ByteString -> (BS.ByteString, BS.ByteString)
repartitionMod8 aBS bBS = (BS.take cLen abBS, BS.drop cLen abBS)
where abBS = BS.concat [aBS, bBS]
abLen = BS.length abBS
cLen = (abLen `div` 8) * 8
compressWordAsBit :: Monad m => Conduit BS.ByteString m BS.ByteString
compressWordAsBit = compressWordAsBit' BS.empty
compressWordAsBit' :: Monad m => BS.ByteString -> Conduit BS.ByteString m BS.ByteString
compressWordAsBit' aBS = do
mbBS <- await
case mbBS of
Just bBS -> do
let (cBS, dBS) = repartitionMod8 aBS bBS
let (cs, _) = BS.unfoldrN (BS.length cBS + 7 `div` 8) gen cBS
yield cs
compressWordAsBit' dBS
Nothing -> do
let (cs, _) = BS.unfoldrN (BS.length aBS + 7 `div` 8) gen aBS
yield cs
where gen :: ByteString -> Maybe (Word8, ByteString)
gen xs = if BS.length xs == 0
then Nothing
else Just ( BS.foldr' (\b m -> ((b .&. 1) .|. (m .<. 1))) 0 (BS.take 8 xs)
, BS.drop 8 xs
)
blankedXmlToBalancedParens2 :: Monad m => Conduit BS.ByteString m BS.ByteString
blankedXmlToBalancedParens2 = do
mbs <- await
case mbs of
Just bs -> do
let (cs, _) = BS.unfoldrN (BS.length bs * 2) gen (Nothing, bs)
yield cs
blankedXmlToBalancedParens2
Nothing -> return ()
where gen :: (Maybe Bool, ByteString) -> Maybe (Word8, (Maybe Bool, ByteString))
gen (Just True , bs) = Just (0xFF, (Nothing, bs))
gen (Just False , bs) = Just (0x00, (Nothing, bs))
gen (Nothing , bs) = case BS.uncons bs of
Just (c, cs) -> case balancedParensOf c of
MiniN -> gen (Nothing , cs)
MiniT -> Just (0xFF, (Nothing , cs))
MiniF -> Just (0x00, (Nothing , cs))
MiniTF -> Just (0xFF, (Just False , cs))
Nothing -> Nothing
data MiniBP = MiniN | MiniT | MiniF | MiniTF
balancedParensOf :: Word8 -> MiniBP
balancedParensOf c = case c of
d | d == _less -> MiniT
d | d == _greater -> MiniF
d | d == _bracketleft -> MiniT
d | d == _bracketright -> MiniF
d | d == _parenleft -> MiniT
d | d == _parenright -> MiniF
d | d == _t -> MiniTF
d | d == _a -> MiniTF
d | d == _v -> MiniTF
_ -> MiniN
yieldBitsOfWord8 :: Monad m => Word8 -> Conduit BS.ByteString m Bool
yieldBitsOfWord8 w = do
yield ((w .&. BITS.bit 0) /= 0)
yield ((w .&. BITS.bit 1) /= 0)
yield ((w .&. BITS.bit 2) /= 0)
yield ((w .&. BITS.bit 3) /= 0)
yield ((w .&. BITS.bit 4) /= 0)
yield ((w .&. BITS.bit 5) /= 0)
yield ((w .&. BITS.bit 6) /= 0)
yield ((w .&. BITS.bit 7) /= 0)
yieldBitsofWord8s :: Monad m => [Word8] -> Conduit BS.ByteString m Bool
yieldBitsofWord8s = P.foldr ((>>) . yieldBitsOfWord8) (return ())
byteStringToBits :: Monad m => Conduit BS.ByteString m Bool
byteStringToBits = do
mbs <- await
case mbs of
Just bs -> yieldBitsofWord8s (BS.unpack bs) >> byteStringToBits
Nothing -> return ()