{-# LANGUAGE CPP, RankNTypes, DeriveDataTypeable, BangPatterns #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
module Codec.Compression.Zlib.Internal (
compress,
decompress,
CompressStream(..),
compressST,
compressIO,
foldCompressStream,
foldCompressStreamWithInput,
DecompressStream(..),
DecompressError(..),
decompressST,
decompressIO,
foldDecompressStream,
foldDecompressStreamWithInput,
CompressParams(..),
defaultCompressParams,
DecompressParams(..),
defaultDecompressParams,
Stream.Format(..),
Stream.gzipFormat,
Stream.zlibFormat,
Stream.rawFormat,
Stream.gzipOrZlibFormat,
Stream.CompressionLevel(..),
Stream.defaultCompression,
Stream.noCompression,
Stream.bestSpeed,
Stream.bestCompression,
Stream.compressionLevel,
Stream.Method(..),
Stream.deflateMethod,
Stream.WindowBits(..),
Stream.defaultWindowBits,
Stream.windowBits,
Stream.MemoryLevel(..),
Stream.defaultMemoryLevel,
Stream.minMemoryLevel,
Stream.maxMemoryLevel,
Stream.memoryLevel,
Stream.CompressionStrategy(..),
Stream.defaultStrategy,
Stream.filteredStrategy,
Stream.huffmanOnlyStrategy,
) where
import Prelude hiding (length)
import Control.Monad (when)
import Control.Exception (Exception, throw, assert)
import Control.Monad.ST.Lazy hiding (stToIO)
import Control.Monad.ST.Strict (stToIO)
#if __GLASGOW_HASKELL__ >= 702
import qualified Control.Monad.ST.Unsafe as Unsafe (unsafeIOToST)
#else
import qualified Control.Monad.ST.Strict as Unsafe (unsafeIOToST)
#endif
import Data.Typeable (Typeable)
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as L
import qualified Data.ByteString as S
import qualified Data.ByteString.Internal as S
import Data.Word (Word8)
import GHC.IO (noDuplicate)
import qualified Codec.Compression.Zlib.Stream as Stream
import Codec.Compression.Zlib.Stream (Stream)
data CompressParams = CompressParams {
compressLevel :: !Stream.CompressionLevel,
compressMethod :: !Stream.Method,
compressWindowBits :: !Stream.WindowBits,
compressMemoryLevel :: !Stream.MemoryLevel,
compressStrategy :: !Stream.CompressionStrategy,
compressBufferSize :: !Int,
compressDictionary :: Maybe S.ByteString
} deriving Show
data DecompressParams = DecompressParams {
decompressWindowBits :: !Stream.WindowBits,
decompressBufferSize :: !Int,
decompressDictionary :: Maybe S.ByteString,
decompressAllMembers :: Bool
} deriving Show
defaultCompressParams :: CompressParams
defaultCompressParams = CompressParams {
compressLevel = Stream.defaultCompression,
compressMethod = Stream.deflateMethod,
compressWindowBits = Stream.defaultWindowBits,
compressMemoryLevel = Stream.defaultMemoryLevel,
compressStrategy = Stream.defaultStrategy,
compressBufferSize = defaultCompressBufferSize,
compressDictionary = Nothing
}
defaultDecompressParams :: DecompressParams
defaultDecompressParams = DecompressParams {
decompressWindowBits = Stream.defaultWindowBits,
decompressBufferSize = defaultDecompressBufferSize,
decompressDictionary = Nothing,
decompressAllMembers = True
}
defaultCompressBufferSize, defaultDecompressBufferSize :: Int
defaultCompressBufferSize = 16 * 1024 - L.chunkOverhead
defaultDecompressBufferSize = 32 * 1024 - L.chunkOverhead
data DecompressStream m =
DecompressInputRequired {
decompressSupplyInput :: S.ByteString -> m (DecompressStream m)
}
| DecompressOutputAvailable {
decompressOutput :: !S.ByteString,
decompressNext :: m (DecompressStream m)
}
| DecompressStreamEnd {
decompressUnconsumedInput :: S.ByteString
}
| DecompressStreamError {
decompressStreamError :: DecompressError
}
data DecompressError =
TruncatedInput
| DictionaryRequired
| DictionaryMismatch
| DataFormatError String
deriving (Eq, Typeable)
instance Show DecompressError where
show TruncatedInput = modprefix "premature end of compressed data stream"
show DictionaryRequired = modprefix "compressed data stream requires custom dictionary"
show DictionaryMismatch = modprefix "given dictionary does not match the expected one"
show (DataFormatError detail) = modprefix ("compressed data stream format error (" ++ detail ++ ")")
modprefix :: ShowS
modprefix = ("Codec.Compression.Zlib: " ++)
instance Exception DecompressError
foldDecompressStream :: Monad m
=> ((S.ByteString -> m a) -> m a)
-> (S.ByteString -> m a -> m a)
-> (S.ByteString -> m a)
-> (DecompressError -> m a)
-> DecompressStream m -> m a
foldDecompressStream input output end err = fold
where
fold (DecompressInputRequired next) =
input (\x -> next x >>= fold)
fold (DecompressOutputAvailable outchunk next) =
output outchunk (next >>= fold)
fold (DecompressStreamEnd inchunk) = end inchunk
fold (DecompressStreamError derr) = err derr
foldDecompressStreamWithInput :: (S.ByteString -> a -> a)
-> (L.ByteString -> a)
-> (DecompressError -> a)
-> (forall s. DecompressStream (ST s))
-> L.ByteString
-> a
foldDecompressStreamWithInput chunk end err = \s lbs ->
runST (fold s (L.toChunks lbs))
where
fold (DecompressInputRequired next) [] =
next S.empty >>= \strm -> fold strm []
fold (DecompressInputRequired next) (inchunk:inchunks) =
next inchunk >>= \s -> fold s inchunks
fold (DecompressOutputAvailable outchunk next) inchunks = do
r <- next >>= \s -> fold s inchunks
return $ chunk outchunk r
fold (DecompressStreamEnd inchunk) inchunks =
return $ end (L.fromChunks (inchunk:inchunks))
fold (DecompressStreamError derr) _ =
return $ err derr
data CompressStream m =
CompressInputRequired {
compressSupplyInput :: S.ByteString -> m (CompressStream m)
}
| CompressOutputAvailable {
compressOutput :: !S.ByteString,
compressNext :: m (CompressStream m)
}
| CompressStreamEnd
foldCompressStream :: Monad m
=> ((S.ByteString -> m a) -> m a)
-> (S.ByteString -> m a -> m a)
-> m a
-> CompressStream m -> m a
foldCompressStream input output end = fold
where
fold (CompressInputRequired next) =
input (\x -> next x >>= fold)
fold (CompressOutputAvailable outchunk next) =
output outchunk (next >>= fold)
fold CompressStreamEnd =
end
foldCompressStreamWithInput :: (S.ByteString -> a -> a)
-> a
-> (forall s. CompressStream (ST s))
-> L.ByteString
-> a
foldCompressStreamWithInput chunk end = \s lbs ->
runST (fold s (L.toChunks lbs))
where
fold (CompressInputRequired next) [] =
next S.empty >>= \strm -> fold strm []
fold (CompressInputRequired next) (inchunk:inchunks) =
next inchunk >>= \s -> fold s inchunks
fold (CompressOutputAvailable outchunk next) inchunks = do
r <- next >>= \s -> fold s inchunks
return $ chunk outchunk r
fold CompressStreamEnd _inchunks =
return end
compress :: Stream.Format -> CompressParams -> L.ByteString -> L.ByteString
compressST :: Stream.Format -> CompressParams -> CompressStream (ST s)
compressIO :: Stream.Format -> CompressParams -> CompressStream IO
compress format params = foldCompressStreamWithInput
L.Chunk L.Empty
(compressStreamST format params)
compressST format params = compressStreamST format params
compressIO format params = compressStreamIO format params
compressStream :: Stream.Format -> CompressParams -> S.ByteString
-> Stream (CompressStream Stream)
compressStream format (CompressParams compLevel method bits memLevel
strategy initChunkSize mdict) =
\chunk -> do
Stream.deflateInit format compLevel method bits memLevel strategy
setDictionary mdict
case chunk of
_ | S.null chunk ->
fillBuffers 20
S.PS inFPtr offset length -> do
Stream.pushInputBuffer inFPtr offset length
fillBuffers initChunkSize
where
fillBuffers :: Int -> Stream (CompressStream Stream)
fillBuffers outChunkSize = do
#ifdef DEBUG
Stream.consistencyCheck
#endif
inputBufferEmpty <- Stream.inputBufferEmpty
outputBufferFull <- Stream.outputBufferFull
assert (inputBufferEmpty || outputBufferFull) $ return ()
when outputBufferFull $ do
outFPtr <- Stream.unsafeLiftIO (S.mallocByteString outChunkSize)
Stream.pushOutputBuffer outFPtr 0 outChunkSize
if inputBufferEmpty
then return $ CompressInputRequired $ \chunk ->
case chunk of
_ | S.null chunk -> drainBuffers True
S.PS inFPtr offset length -> do
Stream.pushInputBuffer inFPtr offset length
drainBuffers False
else drainBuffers False
drainBuffers :: Bool -> Stream (CompressStream Stream)
drainBuffers lastChunk = do
inputBufferEmpty' <- Stream.inputBufferEmpty
outputBufferFull' <- Stream.outputBufferFull
assert(not outputBufferFull'
&& (lastChunk || not inputBufferEmpty')) $ return ()
let flush = if lastChunk then Stream.Finish else Stream.NoFlush
status <- Stream.deflate flush
case status of
Stream.Ok -> do
outputBufferFull <- Stream.outputBufferFull
if outputBufferFull
then do (outFPtr, offset, length) <- Stream.popOutputBuffer
let chunk = S.PS outFPtr offset length
return $ CompressOutputAvailable chunk $ do
fillBuffers defaultCompressBufferSize
else do fillBuffers defaultCompressBufferSize
Stream.StreamEnd -> do
inputBufferEmpty <- Stream.inputBufferEmpty
assert inputBufferEmpty $ return ()
outputBufferBytesAvailable <- Stream.outputBufferBytesAvailable
if outputBufferBytesAvailable > 0
then do (outFPtr, offset, length) <- Stream.popOutputBuffer
let chunk = S.PS outFPtr offset length
Stream.finalise
return $ CompressOutputAvailable chunk (return CompressStreamEnd)
else do Stream.finalise
return CompressStreamEnd
Stream.Error code msg -> case code of
Stream.BufferError -> fail "BufferError should be impossible!"
Stream.NeedDict _ -> fail "NeedDict is impossible!"
_ -> fail msg
setDictionary :: Maybe S.ByteString -> Stream ()
setDictionary (Just dict)
| Stream.formatSupportsDictionary format = do
status <- Stream.deflateSetDictionary dict
case status of
Stream.Ok -> return ()
Stream.Error _ msg -> fail msg
_ -> fail "error when setting deflate dictionary"
setDictionary _ = return ()
decompress :: Stream.Format -> DecompressParams -> L.ByteString -> L.ByteString
decompressST :: Stream.Format -> DecompressParams -> DecompressStream (ST s)
decompressIO :: Stream.Format -> DecompressParams -> DecompressStream IO
decompress format params = foldDecompressStreamWithInput
L.Chunk (const L.Empty) throw
(decompressStreamST format params)
decompressST format params = decompressStreamST format params
decompressIO format params = decompressStreamIO format params
decompressStream :: Stream.Format -> DecompressParams
-> Bool -> S.ByteString
-> Stream (DecompressStream Stream)
decompressStream format (DecompressParams bits initChunkSize mdict allMembers)
resume =
\chunk -> do
inputBufferEmpty <- Stream.inputBufferEmpty
outputBufferFull <- Stream.outputBufferFull
assert inputBufferEmpty $
if resume then assert (format == Stream.gzipFormat && allMembers) $
Stream.inflateReset
else assert outputBufferFull $
Stream.inflateInit format bits
case chunk of
_ | S.null chunk -> do
when outputBufferFull $ do
let outChunkSize = 1
outFPtr <- Stream.unsafeLiftIO (S.mallocByteString outChunkSize)
Stream.pushOutputBuffer outFPtr 0 outChunkSize
drainBuffers True
S.PS inFPtr offset length -> do
Stream.pushInputBuffer inFPtr offset length
assert (if not resume then outputBufferFull else True) $ return ()
if outputBufferFull
then fillBuffers initChunkSize
else drainBuffers False
where
fillBuffers :: Int
-> Stream (DecompressStream Stream)
fillBuffers outChunkSize = do
#ifdef DEBUG
Stream.consistencyCheck
#endif
inputBufferEmpty <- Stream.inputBufferEmpty
outputBufferFull <- Stream.outputBufferFull
assert (inputBufferEmpty || outputBufferFull) $ return ()
when outputBufferFull $ do
outFPtr <- Stream.unsafeLiftIO (S.mallocByteString outChunkSize)
Stream.pushOutputBuffer outFPtr 0 outChunkSize
if inputBufferEmpty
then return $ DecompressInputRequired $ \chunk ->
case chunk of
_ | S.null chunk -> drainBuffers True
S.PS inFPtr offset length -> do
Stream.pushInputBuffer inFPtr offset length
drainBuffers False
else drainBuffers False
drainBuffers :: Bool -> Stream (DecompressStream Stream)
drainBuffers lastChunk = do
inputBufferEmpty' <- Stream.inputBufferEmpty
outputBufferFull' <- Stream.outputBufferFull
assert(not outputBufferFull'
&& (lastChunk || not inputBufferEmpty')) $ return ()
status <- Stream.inflate Stream.NoFlush
case status of
Stream.Ok -> do
outputBufferFull <- Stream.outputBufferFull
if outputBufferFull
then do (outFPtr, offset, length) <- Stream.popOutputBuffer
let chunk = S.PS outFPtr offset length
return $ DecompressOutputAvailable chunk $ do
fillBuffers defaultDecompressBufferSize
else do fillBuffers defaultDecompressBufferSize
Stream.StreamEnd -> do
inputBufferEmpty <- Stream.inputBufferEmpty
if inputBufferEmpty
then do finish (DecompressStreamEnd S.empty)
else do (inFPtr, offset, length) <- Stream.popRemainingInputBuffer
let inchunk = S.PS inFPtr offset length
finish (DecompressStreamEnd inchunk)
Stream.Error code msg -> case code of
Stream.BufferError -> finish (DecompressStreamError TruncatedInput)
Stream.NeedDict adler -> do
err <- setDictionary adler mdict
case err of
Just streamErr -> finish streamErr
Nothing -> drainBuffers lastChunk
Stream.DataError -> finish (DecompressStreamError (DataFormatError msg))
_ -> fail msg
finish end = do
outputBufferBytesAvailable <- Stream.outputBufferBytesAvailable
if outputBufferBytesAvailable > 0
then do (outFPtr, offset, length) <- Stream.popOutputBuffer
return (DecompressOutputAvailable (S.PS outFPtr offset length) (return end))
else return end
setDictionary :: Stream.DictionaryHash -> Maybe S.ByteString
-> Stream (Maybe (DecompressStream Stream))
setDictionary _adler Nothing =
return $ Just (DecompressStreamError DictionaryRequired)
setDictionary _adler (Just dict) = do
status <- Stream.inflateSetDictionary dict
case status of
Stream.Ok -> return Nothing
Stream.Error Stream.DataError _ ->
return $ Just (DecompressStreamError DictionaryMismatch)
_ -> fail "error when setting inflate dictionary"
mkStateST :: ST s (Stream.State s)
mkStateIO :: IO (Stream.State RealWorld)
mkStateST = strictToLazyST Stream.mkState
mkStateIO = stToIO Stream.mkState
runStreamST :: Stream a -> Stream.State s -> ST s (a, Stream.State s)
runStreamIO :: Stream a -> Stream.State RealWorld -> IO (a, Stream.State RealWorld)
runStreamST strm zstate = strictToLazyST (Unsafe.unsafeIOToST noDuplicate >> Stream.runStream strm zstate)
runStreamIO strm zstate = stToIO (Stream.runStream strm zstate)
compressStreamIO :: Stream.Format -> CompressParams -> CompressStream IO
compressStreamIO format params =
CompressInputRequired {
compressSupplyInput = \chunk -> do
zstate <- mkStateIO
let next = compressStream format params
(strm', zstate') <- runStreamIO (next chunk) zstate
return (go strm' zstate')
}
where
go :: CompressStream Stream -> Stream.State RealWorld -> CompressStream IO
go (CompressInputRequired next) zstate =
CompressInputRequired {
compressSupplyInput = \chunk -> do
(strm', zstate') <- runStreamIO (next chunk) zstate
return (go strm' zstate')
}
go (CompressOutputAvailable chunk next) zstate =
CompressOutputAvailable chunk $ do
(strm', zstate') <- runStreamIO next zstate
return (go strm' zstate')
go CompressStreamEnd _ = CompressStreamEnd
compressStreamST :: Stream.Format -> CompressParams -> CompressStream (ST s)
compressStreamST format params =
CompressInputRequired {
compressSupplyInput = \chunk -> do
zstate <- mkStateST
let next = compressStream format params
(strm', zstate') <- runStreamST (next chunk) zstate
return (go strm' zstate')
}
where
go :: CompressStream Stream -> Stream.State s -> CompressStream (ST s)
go (CompressInputRequired next) zstate =
CompressInputRequired {
compressSupplyInput = \chunk -> do
(strm', zstate') <- runStreamST (next chunk) zstate
return (go strm' zstate')
}
go (CompressOutputAvailable chunk next) zstate =
CompressOutputAvailable chunk $ do
(strm', zstate') <- runStreamST next zstate
return (go strm' zstate')
go CompressStreamEnd _ = CompressStreamEnd
decompressStreamIO :: Stream.Format -> DecompressParams -> DecompressStream IO
decompressStreamIO format params =
DecompressInputRequired $ \chunk -> do
zstate <- mkStateIO
let next = decompressStream format params False
(strm', zstate') <- runStreamIO (next chunk) zstate
go strm' zstate' (S.null chunk)
where
go :: DecompressStream Stream -> Stream.State RealWorld -> Bool
-> IO (DecompressStream IO)
go (DecompressInputRequired next) zstate !_ =
return $ DecompressInputRequired $ \chunk -> do
(strm', zstate') <- runStreamIO (next chunk) zstate
go strm' zstate' (S.null chunk)
go (DecompressOutputAvailable chunk next) zstate !eof =
return $ DecompressOutputAvailable chunk $ do
(strm', zstate') <- runStreamIO next zstate
go strm' zstate' eof
go (DecompressStreamEnd unconsumed) zstate !eof
| format == Stream.gzipFormat
, decompressAllMembers params
, not eof = tryFollowingStream unconsumed zstate
| otherwise = finaliseStreamEnd unconsumed zstate
go (DecompressStreamError err) zstate !_ = finaliseStreamError err zstate
tryFollowingStream :: S.ByteString -> Stream.State RealWorld -> IO (DecompressStream IO)
tryFollowingStream chunk zstate = case S.length chunk of
0 -> return $ DecompressInputRequired $ \chunk' -> case S.length chunk' of
0 -> finaliseStreamEnd S.empty zstate
1 | S.head chunk' /= 0x1f
-> finaliseStreamEnd chunk' zstate
1 -> return $ DecompressInputRequired $ \chunk'' -> case S.length chunk'' of
0 -> finaliseStreamEnd chunk' zstate
_ -> checkHeaderSplit (S.head chunk') chunk'' zstate
_ -> checkHeader chunk' zstate
1 -> return $ DecompressInputRequired $ \chunk' -> case S.length chunk' of
0 -> finaliseStreamEnd chunk zstate
_ -> checkHeaderSplit (S.head chunk) chunk' zstate
_ -> checkHeader chunk zstate
checkHeaderSplit :: Word8 -> S.ByteString -> Stream.State RealWorld -> IO (DecompressStream IO)
checkHeaderSplit 0x1f chunk zstate
| S.head chunk == 0x8b = do
let resume = decompressStream format params True (S.pack [0x1f, 0x8b])
if S.length chunk > 1
then do
(DecompressInputRequired next, zstate') <- runStreamIO resume zstate
(strm', zstate'') <- runStreamIO (next (S.tail chunk)) zstate'
go strm' zstate'' False
else do
(strm, zstate') <- runStreamIO resume zstate
go strm zstate' False
checkHeaderSplit byte chunk zstate =
finaliseStreamEnd (S.cons byte chunk) zstate
checkHeader :: S.ByteString -> Stream.State RealWorld -> IO (DecompressStream IO)
checkHeader chunk zstate
| S.index chunk 0 == 0x1f
, S.index chunk 1 == 0x8b = do
let resume = decompressStream format params True chunk
(strm', zstate') <- runStreamIO resume zstate
go strm' zstate' False
checkHeader chunk zstate = finaliseStreamEnd chunk zstate
finaliseStreamEnd unconsumed zstate = do
_ <- runStreamIO Stream.finalise zstate
return (DecompressStreamEnd unconsumed)
finaliseStreamError err zstate = do
_ <- runStreamIO Stream.finalise zstate
return (DecompressStreamError err)
decompressStreamST :: Stream.Format -> DecompressParams -> DecompressStream (ST s)
decompressStreamST format params =
DecompressInputRequired $ \chunk -> do
zstate <- mkStateST
let next = decompressStream format params False
(strm', zstate') <- runStreamST (next chunk) zstate
go strm' zstate' (S.null chunk)
where
go :: DecompressStream Stream -> Stream.State s -> Bool
-> ST s (DecompressStream (ST s))
go (DecompressInputRequired next) zstate !_ =
return $ DecompressInputRequired $ \chunk -> do
(strm', zstate') <- runStreamST (next chunk) zstate
go strm' zstate' (S.null chunk)
go (DecompressOutputAvailable chunk next) zstate !eof =
return $ DecompressOutputAvailable chunk $ do
(strm', zstate') <- runStreamST next zstate
go strm' zstate' eof
go (DecompressStreamEnd unconsumed) zstate !eof
| format == Stream.gzipFormat
, decompressAllMembers params
, not eof = tryFollowingStream unconsumed zstate
| otherwise = finaliseStreamEnd unconsumed zstate
go (DecompressStreamError err) zstate !_ = finaliseStreamError err zstate
tryFollowingStream :: S.ByteString -> Stream.State s -> ST s (DecompressStream (ST s))
tryFollowingStream chunk zstate =
case S.length chunk of
0 -> return $ DecompressInputRequired $ \chunk' -> case S.length chunk' of
0 -> finaliseStreamEnd S.empty zstate
1 | S.head chunk' /= 0x1f
-> finaliseStreamEnd chunk' zstate
1 -> return $ DecompressInputRequired $ \chunk'' -> case S.length chunk'' of
0 -> finaliseStreamEnd chunk' zstate
_ -> checkHeaderSplit (S.head chunk') chunk'' zstate
_ -> checkHeader chunk' zstate
1 -> return $ DecompressInputRequired $ \chunk' -> case S.length chunk' of
0 -> finaliseStreamEnd chunk zstate
_ -> checkHeaderSplit (S.head chunk) chunk' zstate
_ -> checkHeader chunk zstate
checkHeaderSplit :: Word8 -> S.ByteString -> Stream.State s -> ST s (DecompressStream (ST s))
checkHeaderSplit 0x1f chunk zstate
| S.head chunk == 0x8b = do
let resume = decompressStream format params True (S.pack [0x1f, 0x8b])
if S.length chunk > 1
then do
(DecompressInputRequired next, zstate') <- runStreamST resume zstate
(strm', zstate'') <- runStreamST (next (S.tail chunk)) zstate'
go strm' zstate'' False
else do
(strm, zstate') <- runStreamST resume zstate
go strm zstate' False
checkHeaderSplit byte chunk zstate =
finaliseStreamEnd (S.cons byte chunk) zstate
checkHeader :: S.ByteString -> Stream.State s -> ST s (DecompressStream (ST s))
checkHeader chunk zstate
| S.index chunk 0 == 0x1f
, S.index chunk 1 == 0x8b = do
let resume = decompressStream format params True chunk
(strm', zstate') <- runStreamST resume zstate
go strm' zstate' False
checkHeader chunk zstate = finaliseStreamEnd chunk zstate
finaliseStreamEnd unconsumed zstate = do
_ <- runStreamST Stream.finalise zstate
return (DecompressStreamEnd unconsumed)
finaliseStreamError err zstate = do
_ <- runStreamST Stream.finalise zstate
return (DecompressStreamError err)