{-# LANGUAGE OverloadedStrings #-}
module Codec.Compression.ShannonFano.Internal
( Input,
Table,
split,
chunksOf,
decode,
compressChunk,
compressWithLeftover,
decompressWithLeftover,
)
where
import Control.Arrow ((&&&))
import Data.Bits
import Data.Bool (bool)
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as BSL
import Data.Word
type Table a = [(Word8, a)]
type Input = ByteString
split :: Table Float -> (Table Float, Table Float)
split t = aux t []
where
aux [] l = (l, [])
aux (x : xs) l
| snd x + sum (map snd l) < sum (map snd xs) = aux xs (l ++ [x])
| otherwise = (l ++ [x], xs)
compressChunk :: ByteString -> Word8
compressChunk s = aux s zeroBits
where
aux :: ByteString -> Word8 -> Word8
aux s w
| BSL.null s = w
| otherwise =
let (h, (t, n)) = (BSL.head &&& BSL.tail &&& BSL.length) s
in case h of
49 -> aux t (setBit w (fromEnum n - 1))
48 -> aux t w
chunksOf :: Int -> ByteString -> [ByteString]
chunksOf n = go
where
go t = case BSL.splitAt (toEnum n) t of
(a, b)
| BSL.null a -> []
| otherwise -> a : go b
compress :: ByteString -> ByteString
compress = BSL.pack . map compressChunk . chunksOf 8
compressWithLeftover :: ByteString -> ByteString
compressWithLeftover s = BSL.append (int2compressedBS (fromEnum (BSL.length s) `mod` 8)) (compress s)
where
int2compressedBS :: Int -> ByteString
int2compressedBS n
| n > 128 = error "excess length greater than 8"
| otherwise = compress . bool2BS . bitList $ n
decode :: ByteString -> ByteString
decode = BSL.concatMap (bool2BS . bitList)
decompressWithLeftover :: ByteString -> ByteString
decompressWithLeftover = BSL.concat . aux . (tail &&& (fromEnum . compressChunk . head)) . chunksOf 8 . decode
where
aux :: ([ByteString], Int) -> [ByteString]
aux ([], _) = []
aux ([x], 0) = [x]
aux ([x], i) = [BSL.drop (toEnum (8 - i)) x]
aux (x : xs, i) = x : aux (xs, i)
bitList :: Bits a => a -> [Bool]
bitList x = map (testBit x) [7, 6 .. 0]
bool2BS :: [Bool] -> ByteString
bool2BS = BSL.concat . map (bool "0" "1")