{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ParallelListComp #-}
{-# LANGUAGE CPP #-}
#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Generator.LZ78
-- Copyright   :  (c) Edward Kmett 2009-2012
-- License     :  BSD-style
-- Maintainer  :  ekmett@gmail.com
-- Stability   :  experimental
-- Portability :  non-portable (type families)
--
-- Compression algorithms are all about exploiting redundancy. When applying
-- an expensive 'Reducer' to a redundant source, it may be better to
-- extract the structural redundancy that is present. 'LZ78' is a compression
-- algorithm that does so, without requiring the dictionary to be populated
-- with all of the possible values of a data type unlike its later
-- refinement LZW, and which has fewer comparison reqirements during encoding
-- than its earlier counterpart LZ77.
-----------------------------------------------------------------------------

module Data.Compressed.Internal.LZ78
    (
    -- * Lempel-Ziv 78
      Token(..)
    , LZ78(..)
    -- * Encoding
    , encode    -- /O(n)/
    , encodeOrd -- /O(n log n)/
    , encodeEq  -- /O(n^2)/
    -- * Decoding (reduce)
    , decode
    -- * Recoding
    , recode    -- /O(n)/
    , recodeOrd -- /O(n log n)/
    , recodeEq  -- /O(n^2)/
    -- * Unsafe (exposes internal structure)
    , Entry(..)
    , entries
    ) where

#if __GLASGOW_HASKELL__ < 710
import Control.Applicative
import Data.Traversable
#endif
import Control.Monad.Zip
import qualified Data.Sequence as Seq
import Data.Sequence ((|>))
import qualified Data.Map as Map
import qualified Data.HashMap.Lazy as HashMap
import qualified Data.List as List
import Data.Functor.Extend
import Data.Generator
import Data.Function (on)
import Data.Key as Key
import Data.Foldable
import Data.Semigroup
import Data.Pointed
import Text.Read
import Control.Comonad
import Data.Hashable
import Data.Semigroup.Reducer (Reducer(..), Count(..))

data Token a = Token {-# UNPACK #-} !Int a deriving (Eq, Ord)

instance Functor Token where
  fmap f (Token i a) = Token i (f a)

instance Foldable Token where
  foldMap f (Token _ a) = f a

instance Traversable Token where
  traverse f (Token i a) = Token i <$> f a

instance Extend Token where
  extended = extend

instance Comonad Token where
  extend f t@(Token i _) = Token i (f t)
  duplicate t@(Token i _) = Token i t
  extract (Token _ a) = a

instance Hashable a => Hashable (Token a) where
  hashWithSalt s (Token i a) = s `hashWithSalt` i `hashWithSalt` a

-- | An LZ78 compressed 'Generator'.
data LZ78 a
  = Cons {-# UNPACK #-} !(Token a) (LZ78 a)
  | Nil

instance Show a => Show (LZ78 a) where
  showsPrec d xs = showParen (d > 10) $
    showString "encode " . showsPrec 11 (toList xs)

instance Eq a => Eq (LZ78 a) where
  (==) = (==) `on` decode

instance Ord a => Ord (LZ78 a) where
  compare = compare `on` decode

instance (Read a, Hashable a, Eq a) => Read (LZ78 a) where
  readPrec = parens $ prec 10 $ do
    Ident "encode" <- lexP
    encode <$> step readPrec

instance Generator (LZ78 a) where
  type Elem (LZ78 a) = a
  mapTo = go (Seq.singleton mempty) where
    go _ _ m Nil = m
    go s f m (Cons (Token w c) ws) = m `mappend` go (s |> v) f v ws where
      v = Seq.index s w `mappend`  unit (f c)

instance Functor LZ78 where
  fmap f (Cons (Token i a) as) = Cons (Token i (f a)) (fmap f as)
  fmap _ Nil = Nil
  a <$ xs = go 0 (getCount (reduce xs)) where
     go !_ 0 = Nil
     go k  n | n > k = Cons (Token k a) (go (k + 1) (n - k - 1))
             | otherwise = Cons (Token (n - 1) a) Nil

instance Pointed LZ78 where
  point a = Cons (Token 0 a) Nil

instance Foldable LZ78 where
  foldMap f = unwrapMonoid . mapReduce f
  fold      = unwrapMonoid . reduce

-- | /O(n)/ Construct an LZ78-compressed 'Generator' using a 'HashMap' internally.
encode :: (Hashable a, Eq a) => [a] -> LZ78 a
encode = go HashMap.empty 1 0 where
  go _ _ _ [] = Nil
  go _ _ p [c] = Cons (Token p c) Nil
  go d f p (c:cs) = let t = Token p c in case HashMap.lookup t d of
    Just p' -> go d f p' cs
    Nothing -> Cons t (go (HashMap.insert t f d) (succ f) 0 cs)

-- | /O(n log n)/ Contruct an LZ78-compressed 'Generator' using a 'Map' internally.
encodeOrd :: Ord a => [a] -> LZ78 a
encodeOrd = go Map.empty 1 0 where
  go _ _ _ [] = Nil
  go _ _ p [c] = Cons (Token p c) Nil
  go d f p (c:cs) = let t = Token p c in case Map.lookup t d of
    Just p' -> go d f p' cs
    Nothing -> Cons t (go (Map.insert t f d) (succ f) 0 cs)

-- | /O(n^2)/ Contruct an LZ78-compressed 'Generator' using a list internally, requires an instance of Eq,
-- less efficient than encode.
encodeEq :: Eq a => [a] -> LZ78 a
encodeEq = go [] 1 0 where
  go _ _ _ [] = Nil
  go _ _ p [c] = Cons (Token p c) Nil
  go d f p (c:cs) = let t = Token p c in case List.lookup t d of
    Just p' -> go d f p' cs
    Nothing -> Cons t (go ((t, f):d) (succ f) 0 cs)

-- | A type-constrained 'reduce' operation
decode :: LZ78 a -> [a]
decode = reduce

-- | /O(n)/. Recompress with 'Hashable'
recode :: (Eq a, Hashable a) => LZ78 a -> LZ78 a
recode = encode . decode

-- | /O(n log n)/. Recompress with 'Ord'
recodeOrd :: Ord a => LZ78 a -> LZ78 a
recodeOrd = encodeOrd . decode

-- | /O(n^2)/. Recompress with 'Eq'
recodeEq :: Eq a => LZ78 a -> LZ78 a
recodeEq = encodeEq . decode

data Entry i a = Entry !i a deriving (Show,Read)

instance Functor (Entry i) where
  fmap f (Entry i a) = Entry i (f a)

instance Extend (Entry i) where
  extended = extend

instance Comonad (Entry i) where
  extend f e@(Entry i _) = Entry i (f e)
  duplicate e@(Entry i _) = Entry i e
  extract (Entry _ a) = a

instance Eq i => Eq (Entry i a) where
  Entry i _ == Entry j _ = i == j

instance Ord i => Ord (Entry i a) where
  compare (Entry i _) (Entry j _) = compare i j

instance Hashable i => Hashable (Entry i a) where
  hashWithSalt n (Entry i _) = hashWithSalt n i

-- | exposes internal structure
entries :: LZ78 a -> LZ78 (Entry Int a)
entries = go 0 where
  go k (Cons (Token i t) xs) = Cons (Token i (Entry k t)) $ (go $! k + 1) xs
  go _ Nil = Nil

instance Applicative LZ78 where
  pure a = Cons (Token 0 a) Nil
  fs <*> as = fmap extract $ encode $ do
    Entry i f <- decode (entries fs)
    Entry j a <- decode (entries as)
    return $ Entry (i,j) (f a)
  as *> bs = fmap extract $ encode $ Prelude.concat $ replicate (reduceWith getCount as)  $  decode (entries bs)
  as <* bs = fmap extract $ encode $ Prelude.concat $ replicate (reduceWith getCount bs) <$> decode (entries as)

instance Monad LZ78 where
  return a = Cons (Token 0 a) Nil
  (>>) = (*>)
  as >>= k = fmap extract $ encode $ do
    Entry i a <- decode (entries as)
    Entry j b <- decode (entries (k a))
    return $ Entry (i,j) b

instance MonadZip LZ78 where
  mzipWith = Key.zipWith
  munzip as = (fmap fst as, fmap snd as)

instance Adjustable LZ78 where
  adjust f i = fmap extract . encode . adjust (Entry (-1) . f . extract) i . decode . entries

type instance Key LZ78 = Int

instance Lookup LZ78 where
  lookup i xs = Key.lookup i (decode xs)

instance Indexable LZ78 where
  index xs i = index (decode xs) i

instance FoldableWithKey LZ78 where
  foldMapWithKey f xs = foldMapWithKey f (decode xs)

instance Zip LZ78 where
  zipWith f as bs = extract <$> encode
    [ Entry (i,j) (f a b)
    | Entry i a <- decode (entries as)
    | Entry j b <- decode (entries bs)
    ]