{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Data.Git.Internal.Pack where
import Codec.Compression.Zlib.Internal hiding (Raw)
import Control.Applicative
import Control.Concurrent.MVar
import Control.Exception
import Control.Monad
import Control.Monad.Fail
import Control.Monad.Reader
import Control.Monad.RWS
import Control.Monad.ST
import Data.Attoparsec.ByteString as A
import Data.Attoparsec.Combinator (lookAhead)
import Data.Bits
import qualified Data.ByteString as B
import Data.ByteString.Builder (Builder)
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Lazy as BL
import Data.Digest.CRC32
import Data.Map (Map)
import qualified Data.Map as M
import Data.STRef
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM
import qualified Data.Vector.Unboxed as UV
import qualified Data.Vector.Unboxed.Mutable as UM
import Data.Word
import System.IO (SeekMode (..), hSeek, hTell)
import System.Posix.FilePath
import Data.Git.Hash
import Data.Git.Internal.FileUtil
import Data.Git.Internal.Object (parseBlob, parseCommit, parseTag, parseTree)
import Data.Git.Internal.Parsers
import Data.Git.Internal.Types (PackFile (..), PackIndex (..), Crc32)
import Data.Git.Object
data PackObject = PackObject ObjectType
| DeltaOff Int
| DeltaRef Sha1
deriving (Eq, Ord, Show)
data Raw = Raw {
rawType :: !PackObject
, rawData :: !BL.ByteString
} deriving (Eq, Ord, Show)
parsePackedRaw' :: Parser Raw
parsePackedRaw' = do (ty, sz) <- parseCompactHeader
rest <- lookAhead takeLazyByteString
let (_, dat) = decompressTo sz rest
return $ Raw ty dat
base128le :: Parser Word
base128le = do b <- fromIntegral <$> anyWord8
if b `testBit` 7
then base128le >>= return . \n -> n `unsafeShiftL` 7 .|. (0x7f .&. b)
else return b
parseCompactHeader :: Parser (PackObject, Word)
parseCompactHeader = do h <- anyWord8
sz <- if h `testBit` 7 then base128le else return 0
ty <- parseType $ 0x70 .&. h
return (ty, (sz `unsafeShiftL` 4) .|. (0x0f .&. fromIntegral h))
where parseType 0x10 = pure $ PackObject CommitType
parseType 0x20 = pure $ PackObject TreeType
parseType 0x30 = pure $ PackObject BlobType
parseType 0x40 = pure $ PackObject TagType
parseType 0x60 = DeltaOff <$> parseOffset
parseType 0x70 = DeltaRef . Sha1 <$> A.take 20
parseType n = error $ "no type for " ++ show (n `shiftR` 4)
parseOffset :: Parser Int
parseOffset = do byte <- anyWord8
let number = byte .&. 0x7f
loop byte (fromIntegral number)
where
loop byte number | byte `testBit` 7 = do
byte' <- anyWord8
loop byte' $ (number + 1) `unsafeShiftL` 7 .|. (fromIntegral byte' .&. 0x7f)
loop _ number | otherwise = return number
decompressTo :: Word -> BL.ByteString -> (BL.ByteString, BL.ByteString)
decompressTo sz = fmap BL.fromStrict . foldDecompressStreamWithInput ((<>) . ("",)) (,"") throw go
where go = decompressST zlibFormat defaultDecompressParams { decompressBufferSize = fromIntegral sz }
type Patch a = RWST BL.ByteString BB.Builder Word64 Parser a
parsePatch :: Patch ()
parsePatch = do op <- lift anyWord8
b <- if op `testBit` 7
then copy op
else BL.fromStrict <$> lift (A.take $ fromIntegral op)
tell . BB.lazyByteString $ b
where copy op = do put 0
when (op `testBit` 0) $ orShift 0
when (op `testBit` 1) $ orShift 8
when (op `testBit` 2) $ orShift 16
when (op `testBit` 3) $ orShift 24
cp_off <- fromIntegral <$> get
put 0
when (op `testBit` 4) $ orShift 0
when (op `testBit` 5) $ orShift 8
when (op `testBit` 6) $ orShift 16
gets (==0) >>= flip when (put 0x10000)
cp_size <- fromIntegral <$> get
asks $ BL.take cp_size . BL.drop cp_off
orShift n = do byte <- lift anyWord8
modify ((fromIntegral byte `unsafeShiftL` n) .|.)
applyPatch :: BL.ByteString -> B.ByteString -> Either String BL.ByteString
applyPatch base = fmap (BB.toLazyByteString . snd) . parseOnly (execRWST go base 0)
where go = lift base128le >> lift base128le >> many parsePatch
resolveDelta :: Int -> PackFile -> Raw -> Maybe Raw
resolveDelta off pf@(PackFile pb _) (Raw (DeltaOff o) b) = do
lu <- parseMaybe parsePackedRaw' $ BL.drop (fromIntegral $ off - o) pb
mr <- resolveDelta (off - o) pf lu
let patched = either error id (applyPatch (rawData mr) $ BL.toStrict b)
return $ mr { rawData = patched }
resolveDelta _ pf@(PackFile pb ind) (Raw (DeltaRef s) b) = do
o <- getShaOffset ind s
lu <- parseMaybe parsePackedRaw' $ BL.drop (fromIntegral o) pb
mr <- resolveDelta (fromIntegral o) pf lu
let patched = either error id (applyPatch (rawData mr) $ BL.toStrict b)
return $ mr { rawData = patched }
resolveDelta _ _ r = return r
parseFanout :: Parser (UV.Vector Word32)
parseFanout = UV.replicateM 256 word32
parseIndexShas :: Word32 -> Parser (V.Vector Sha1)
parseIndexShas n = V.replicateM (fromIntegral n) (A.take 20 >>= return . Sha1)
parseShaCrcs :: Word32 -> Parser (UV.Vector Crc32)
parseShaCrcs n = UV.replicateM (fromIntegral n) word32
parseShaOffsets :: Word32 -> Parser (UV.Vector Word32)
parseShaOffsets n = UV.replicateM (fromIntegral n) word32
parseBigOffsets :: UV.Vector Word32 -> Parser (UV.Vector Word64)
parseBigOffsets os = UV.replicateM (fromIntegral n) word64
where n = UV.foldl' (\a b -> a + (b `unsafeShiftL` 63)) 0 os
parsePackIndex :: Parser PackIndex
parsePackIndex = do void "\255tOc"
2 <- word32
fo <- parseFanout
let size = fo UV.! 255
ss <- parseIndexShas size
cs <- parseShaCrcs size
os <- parseShaOffsets size
bs <- parseBigOffsets os
return $ PackIndex fo ss cs os bs
getShaOffset :: PackIndex -> Sha1 -> Maybe Word64
getShaOffset pidx s = offset'
where fb, ub, lb :: Int
fb = fromIntegral . B.head . getSha1 $ s
ub = fromIntegral $ (fanout pidx) UV.! fb
lb | fb == 0 = 0
| otherwise = fromIntegral $ (fanout pidx) UV.! pred fb
offset = ((shaOffsets pidx) UV.!) . (lb+) <$>
V.elemIndex s (V.slice lb (ub - lb) (indexShas pidx))
offset' = case offset of
Nothing -> Nothing
Just off | off `testBit` 31 ->
Just ((shaBigOffsets pidx) UV.! fromIntegral (clearBit off 31))
| otherwise ->
Just $ fromIntegral off
readIndexFile :: RawFilePath -> IO PackIndex
readIndexFile p = (either error id . parseOnly parsePackIndex) <$> readRawFileS (p <.> "idx")
isPackIndex :: RawFilePath -> Bool
isPackIndex = (==".idx") . takeExtension
readPackFile :: RawFilePath -> IO PackFile
readPackFile p = PackFile <$> readRawFileL (p <.> "pack") <*> readIndexFile p
findPackSha :: PackFile -> Sha1 -> Maybe Object
findPackSha pf@(PackFile pb ix) s = do
offset <- getShaOffset ix s
raw <- parseMaybe parsePackedRaw' (BL.drop (fromIntegral offset) pb)
Raw (PackObject t) d <- resolveDelta (fromIntegral offset) pf raw
case t of
BlobType -> BlobObj <$> parseMaybe parseBlob ("blob 1234\NUL" <> d)
TreeType -> TreeObj <$> parseMaybe parseTree ("tree 1234\NUL" <> d)
CommitType -> CommitObj <$> parseMaybe parseCommit ("commit 1234\NUL" <> d)
TagType -> TagObj <$> parseMaybe parseTag ("tag 1234\NUL" <> d)
buildPackedObject :: Object -> Builder
buildPackedObject o = buildCompactHeader ty sz <> BB.lazyByteString (compress zlibFormat defaultCompressParams b)
where b = BB.toLazyByteString $ buildObject o
ty = compactTag o
sz = fromIntegral . BL.length $ b
compactTag :: Object -> Word8
compactTag CommitObj {} = 0x10
compactTag TreeObj {} = 0x20
compactTag BlobObj {} = 0x30
compactTag TagObj {} = 0x40
buildCompactHeader :: Word8 -> Word64 -> Builder
buildCompactHeader t sz = go (t .|. 0x0f .&. fromIntegral sz) (sz `unsafeShiftR` 4)
where go c 0 = BB.word8 c
go c n = BB.word8 (c .|. 0x80) <> go (fromIntegral $ 0x7f .&. n) (n `unsafeShiftR` 7)
data IndexData = IndexData
{ idxCrc :: !Word32
, idxOffset :: !Word64
} deriving (Eq, Ord, Show)
type PackIndexer = Map Sha1 IndexData
data PackingState = PackingState
{ psTempFile :: TempFile
, psCount :: Word32
, psIndexer :: PackIndexer
, psOffset :: Word64
}
makePackIndex :: PackIndexer -> PackIndex
makePackIndex idx | sz == 0 =
PackIndex (UV.replicate 256 0) mempty mempty mempty mempty
| otherwise = runST $ do
fan <- UM.replicate 256 0
shas <- VM.unsafeNew sz
crcs <- UM.unsafeNew sz
offs <- UM.unsafeNew sz
bos <- newSTRef (0, [])
forM_ (zip [0..] $ M.toAscList idx) $ \(i, (h@(Sha1 bs), IndexData crc off)) -> do
UM.unsafeWrite fan (fromIntegral $ B.head bs) (fromIntegral $ i + 1)
VM.unsafeWrite shas i h
UM.unsafeWrite crcs i crc
if off < bit 31
then UM.unsafeWrite offs i $ fromIntegral off
else do
(boc, bol) <- readSTRef bos
UM.unsafeWrite offs i (boc `setBit` 31)
writeSTRef bos (boc + 1, (boc, off) : bol)
(boc, bol) <- readSTRef bos
let fillFan lst fi = do
cur <- UM.unsafeRead fan fi
if cur < lst
then UM.unsafeWrite fan fi lst >> return lst
else return cur
foldM_ fillFan 0 [0..255]
PackIndex <$> UV.unsafeFreeze fan
<*> V.unsafeFreeze shas
<*> UV.unsafeFreeze crcs
<*> UV.unsafeFreeze offs
<*> pure (UV.create $ do
v <- UM.unsafeNew (fromIntegral boc)
sequence_ [UM.unsafeWrite v (fromIntegral bi) bo | (bi, bo) <- bol]
return v)
where sz = M.size idx
buildPackIndex :: PackIndex -> Builder
buildPackIndex (PackIndex fan shas crcs offs boffs) =
BB.byteString (B.pack [0xff, 0x74, 0x4f, 0x63])
<> BB.word32BE 2
<> foldMap BB.word32BE (UV.toList fan)
<> foldMap (BB.byteString . getSha1) shas
<> foldMap BB.word32BE (UV.toList crcs)
<> foldMap BB.word32BE (UV.toList offs)
<> foldMap BB.word64BE (UV.toList boffs)
newtype PackingT m a = PackingT { unPackingT :: ReaderT (MVar PackingState) m a }
deriving (Functor, Applicative, Monad, MonadIO, MonadReader (MVar PackingState)
,MonadTrans, MonadFail)
runPackingT :: MonadIO m => (PackFile -> m ()) -> RawFilePath -> PackingT m a -> m a
runPackingT reg p pma = do
mvar <- liftIO $ newPackFile p >>= newMVar
runReaderT (unPackingT $ pma >>= \(!a) -> finishPacking >> return a) mvar
where finishPacking = ask >>= liftIO . (`withMVar` finishPackFile) >>= lift . maybe (pure ()) reg
flushPackFile :: MonadIO m => (PackFile -> m ()) -> PackingT m ()
flushPackFile reg = ask >>= liftIO . (`modifyMVar` go) >>= lift . maybe (pure ()) reg
where go ps = do
p <- finishPackFile ps
n <- newPackFile (tempTemplate (psTempFile ps))
pure (n, p)
packObject :: MonadIO m => Object -> PackingT m Sha1
packObject o = ask >>= liftIO . (`modifyMVar` go)
where go !ps@(PackingState tmp n idx off) | hash `M.member` idx = pure (ps, hash)
| otherwise = do
let objd = BB.toLazyByteString $ buildPackedObject o
off' = off + fromIntegral (BL.length objd)
crc = crc32 objd
idx' = M.insert hash (IndexData crc off) idx
BL.hPut (tempHandle tmp) objd
return $ (PackingState tmp (n + 1) idx' off', hash)
hash = sha1 o
writePackFile :: MonadIO m => (PackFile -> m ()) -> RawFilePath -> [Object] -> m ()
writePackFile reg p os = runPackingT reg p $ mapM_ packObject os
finishPackFile :: MonadIO m => PackingState -> m (Maybe PackFile)
finishPackFile (PackingState tmp 0 _ _ ) =
liftIO (closeTempFile tmp Nothing) >> pure Nothing
finishPackFile (PackingState tmp n idx _) = liftIO $ do
let h = tempHandle tmp
len <- fromIntegral <$> hTell h
hSeek h AbsoluteSeek 8
BB.hPutBuilder h $ BB.word32BE n
hSeek h AbsoluteSeek 0
!hash <- sha1 <$> BL.hGet h len
hSeek h SeekFromEnd 0
B.hPut h $ getSha1 hash
let !baseFilename = takeDirectory (tempFileName tmp) </> ("pack-" <> getSha1Hex (toHex hash))
closeTempFile tmp (Just $ baseFilename <.> "pack")
pnm <- withHandleAtomic (tempFileName tmp) $ \ih -> do
let built = buildPackIndex $ makePackIndex idx
idxbs = BB.toLazyByteString $ built <> (BB.byteString . getSha1 $ hash)
idxhash = sha1 idxbs
BL.hPut ih idxbs
B.hPut ih $ getSha1 idxhash
return (Just $ baseFilename <.> ".idx", baseFilename)
Just <$> readPackFile pnm
newPackFile :: RawFilePath -> IO PackingState
newPackFile p = do
!tmp <- tempFile p
BB.hPutBuilder (tempHandle tmp) $ "PACK" <> BB.word32BE 2 <> BB.word32BE 0
return $ PackingState tmp 0 mempty 12