{-# LANGUAGE CPP, BangPatterns, PatternGuards #-}
{-# LANGUAGE GeneralizedNewtypeDeriving, DeriveDataTypeable #-}
module Codec.Archive.Tar.Index (
TarIndex,
lookup,
TarIndexEntry(..),
toList,
TarEntryOffset,
hReadEntry,
hReadEntryHeader,
build,
IndexBuilder,
empty,
addNextEntry,
skipNextEntry,
finalise,
unfinalise,
serialise,
deserialise,
hReadEntryHeaderOrEof,
hSeekEntryOffset,
hSeekEntryContentOffset,
hSeekEndEntryOffset,
nextEntryOffset,
indexEndEntryOffset,
indexNextEntryOffset,
emptyIndex,
finaliseIndex,
#ifdef TESTS
prop_lookup,
prop_toList,
prop_valid,
prop_serialise_deserialise,
prop_serialiseSize,
prop_index_matches_tar,
prop_finalise_unfinalise,
#endif
) where
import Data.Typeable (Typeable)
import Codec.Archive.Tar.Types as Tar
import Codec.Archive.Tar.Read as Tar
import qualified Codec.Archive.Tar.Index.StringTable as StringTable
import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder)
import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie
import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder)
import qualified System.FilePath.Posix as FilePath
import Data.Monoid (Monoid(..))
#if (MIN_VERSION_base(4,5,0))
import Data.Monoid ((<>))
#endif
import Data.Word
import Data.Int
import Data.Bits
import qualified Data.Array.Unboxed as A
import Prelude hiding (lookup)
import System.IO
import Control.Exception (assert, throwIO)
import Control.DeepSeq
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS.Char8
import qualified Data.ByteString.Lazy as LBS
import qualified Data.ByteString.Unsafe as BS
#if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
import Data.ByteString.Builder as BS
import Data.ByteString.Builder.Extra as BS (toLazyByteStringWith,
untrimmedStrategy)
#else
import Data.ByteString.Lazy.Builder as BS
import Data.ByteString.Lazy.Builder.Extras as BS (toLazyByteStringWith,
untrimmedStrategy)
#endif
#ifdef TESTS
import qualified Prelude
import Test.QuickCheck
import Test.QuickCheck.Property (ioProperty)
import Control.Applicative ((<$>), (<*>))
import Control.Monad (unless)
import Data.List (nub, sort, sortBy, stripPrefix, isPrefixOf)
import Data.Maybe
import Data.Function (on)
import Control.Exception (SomeException, try)
import Codec.Archive.Tar.Write as Tar
import qualified Data.ByteString.Handle as HBS
#endif
data TarIndex = TarIndex
{-# UNPACK #-} !(StringTable PathComponentId)
{-# UNPACK #-} !(IntTrie PathComponentId TarEntryOffset)
{-# UNPACK #-} !TarEntryOffset
deriving (Eq, Show, Typeable)
instance NFData TarIndex where
rnf (TarIndex _ _ _) = ()
data TarIndexEntry = TarFileEntry {-# UNPACK #-} !TarEntryOffset
| TarDir [(FilePath, TarIndexEntry)]
deriving (Show, Typeable)
newtype PathComponentId = PathComponentId Int
deriving (Eq, Ord, Enum, Show, Typeable)
type TarEntryOffset = Word32
lookup :: TarIndex -> FilePath -> Maybe TarIndexEntry
lookup (TarIndex pathTable pathTrie _) path = do
fpath <- toComponentIds pathTable path
tentry <- IntTrie.lookup pathTrie fpath
return (mkIndexEntry tentry)
where
mkIndexEntry (IntTrie.Entry offset) = TarFileEntry offset
mkIndexEntry (IntTrie.Completions entries) =
TarDir [ (fromComponentId pathTable key, mkIndexEntry entry)
| (key, entry) <- entries ]
toComponentIds :: StringTable PathComponentId -> FilePath -> Maybe [PathComponentId]
toComponentIds table =
lookupComponents []
. filter (/= BS.Char8.singleton '.')
. splitDirectories
. BS.Char8.pack
where
lookupComponents cs' [] = Just (reverse cs')
lookupComponents cs' (c:cs) = case StringTable.lookup table c of
Nothing -> Nothing
Just cid -> lookupComponents (cid:cs') cs
fromComponentId :: StringTable PathComponentId -> PathComponentId -> FilePath
fromComponentId table = BS.Char8.unpack . StringTable.index table
toList :: TarIndex -> [(FilePath, TarEntryOffset)]
toList (TarIndex pathTable pathTrie _) =
[ (path, off)
| (cids, off) <- IntTrie.toList pathTrie
, let path = FilePath.joinPath (map (fromComponentId pathTable) cids) ]
build :: Entries e -> Either e TarIndex
build = go empty
where
go !builder (Next e es) = go (addNextEntry e builder) es
go !builder Done = Right $! finalise builder
go !_ (Fail err) = Left err
data IndexBuilder
= IndexBuilder !(StringTableBuilder PathComponentId)
!(IntTrieBuilder PathComponentId TarEntryOffset)
{-# UNPACK #-} !TarEntryOffset
deriving (Eq, Show)
instance NFData IndexBuilder where
rnf (IndexBuilder _ _ _) = ()
empty :: IndexBuilder
empty = IndexBuilder StringTable.empty IntTrie.empty 0
emptyIndex :: IndexBuilder
emptyIndex = empty
{-# DEPRECATED emptyIndex "Use TarIndex.empty" #-}
addNextEntry :: Entry -> IndexBuilder -> IndexBuilder
addNextEntry entry (IndexBuilder stbl itrie nextOffset) =
IndexBuilder stbl' itrie'
(nextEntryOffset entry nextOffset)
where
!entrypath = splitTarPath (entryTarPath entry)
(stbl', cids) = StringTable.inserts entrypath stbl
itrie' = IntTrie.insert cids nextOffset itrie
skipNextEntry :: Entry -> IndexBuilder -> IndexBuilder
skipNextEntry entry (IndexBuilder stbl itrie nextOffset) =
IndexBuilder stbl itrie (nextEntryOffset entry nextOffset)
finalise :: IndexBuilder -> TarIndex
finalise (IndexBuilder stbl itrie finalOffset) =
TarIndex pathTable pathTrie finalOffset
where
pathTable = StringTable.finalise stbl
pathTrie = IntTrie.finalise itrie
finaliseIndex :: IndexBuilder -> TarIndex
finaliseIndex = finalise
{-# DEPRECATED finaliseIndex "Use TarIndex.finalise" #-}
indexNextEntryOffset :: IndexBuilder -> TarEntryOffset
indexNextEntryOffset (IndexBuilder _ _ off) = off
indexEndEntryOffset :: TarIndex -> TarEntryOffset
indexEndEntryOffset (TarIndex _ _ off) = off
nextEntryOffset :: Entry -> TarEntryOffset -> TarEntryOffset
nextEntryOffset entry offset =
offset
+ 1
+ case entryContent entry of
NormalFile _ size -> blocks size
OtherEntryType _ _ size -> blocks size
_ -> 0
where
blocks :: Int64 -> TarEntryOffset
blocks size = fromIntegral (1 + (size - 1) `div` 512)
type FilePathBS = BS.ByteString
splitTarPath :: TarPath -> [FilePathBS]
splitTarPath (TarPath name prefix) =
splitDirectories prefix ++ splitDirectories name
splitDirectories :: FilePathBS -> [FilePathBS]
splitDirectories bs =
case BS.Char8.split '/' bs of
c:cs | BS.null c -> BS.Char8.singleton '/' : filter (not . BS.null) cs
cs -> filter (not . BS.null) cs
unfinalise :: TarIndex -> IndexBuilder
unfinalise (TarIndex pathTable pathTrie finalOffset) =
IndexBuilder (StringTable.unfinalise pathTable)
(IntTrie.unfinalise pathTrie)
finalOffset
hReadEntry :: Handle -> TarEntryOffset -> IO Entry
hReadEntry hnd off = do
entry <- hReadEntryHeader hnd off
case entryContent entry of
NormalFile _ size -> do body <- LBS.hGet hnd (fromIntegral size)
return entry {
entryContent = NormalFile body size
}
OtherEntryType c _ size -> do body <- LBS.hGet hnd (fromIntegral size)
return entry {
entryContent = OtherEntryType c body size
}
_ -> return entry
hReadEntryHeader :: Handle -> TarEntryOffset -> IO Entry
hReadEntryHeader hnd blockOff = do
hSeekEntryOffset hnd blockOff
header <- LBS.hGet hnd 512
case Tar.read header of
Tar.Next entry _ -> return entry
Tar.Fail e -> throwIO e
Tar.Done -> fail "hReadEntryHeader: impossible"
hSeekEntryOffset :: Handle -> TarEntryOffset -> IO ()
hSeekEntryOffset hnd blockOff =
hSeek hnd AbsoluteSeek (fromIntegral blockOff * 512)
hSeekEntryContentOffset :: Handle -> TarEntryOffset -> IO ()
hSeekEntryContentOffset hnd blockOff =
hSeekEntryOffset hnd (blockOff + 1)
hReadEntryHeaderOrEof :: Handle -> TarEntryOffset
-> IO (Maybe (Entry, TarEntryOffset))
hReadEntryHeaderOrEof hnd blockOff = do
hSeekEntryOffset hnd blockOff
header <- LBS.hGet hnd 1024
case Tar.read header of
Tar.Next entry _ -> let !blockOff' = nextEntryOffset entry blockOff
in return (Just (entry, blockOff'))
Tar.Done -> return Nothing
Tar.Fail e -> throwIO e
hSeekEndEntryOffset :: Handle -> Maybe TarIndex -> IO TarEntryOffset
hSeekEndEntryOffset hnd (Just index) = do
let offset = indexEndEntryOffset index
hSeekEntryOffset hnd offset
return offset
hSeekEndEntryOffset hnd Nothing = do
size <- hFileSize hnd
if size == 0
then return 0
else seekToEnd 0
where
seekToEnd offset = do
mbe <- hReadEntryHeaderOrEof hnd offset
case mbe of
Nothing -> do hSeekEntryOffset hnd offset
return offset
Just (_, offset') -> seekToEnd offset'
serialise :: TarIndex -> BS.ByteString
serialise = toStrict . serialiseLBS
serialiseLBS :: TarIndex -> LBS.ByteString
serialiseLBS index =
BS.toLazyByteStringWith
(BS.untrimmedStrategy (serialiseSize index) 512) LBS.empty
(serialiseBuilder index)
serialiseSize :: TarIndex -> Int
serialiseSize (TarIndex stringTable intTrie _) =
StringTable.serialiseSize stringTable
+ IntTrie.serialiseSize intTrie
+ 8
serialiseBuilder :: TarIndex -> BS.Builder
serialiseBuilder (TarIndex stringTable intTrie finalOffset) =
BS.word32BE 2
<> BS.word32BE finalOffset
<> StringTable.serialise stringTable
<> IntTrie.serialise intTrie
deserialise :: BS.ByteString -> Maybe (TarIndex, BS.ByteString)
deserialise bs
| BS.length bs < 8
= Nothing
| let ver = readWord32BE bs 0
, ver == 1
= do let !finalOffset = readWord32BE bs 4
(stringTable, bs') <- StringTable.deserialiseV1 (BS.drop 8 bs)
(intTrie, bs'') <- IntTrie.deserialise bs'
return (TarIndex stringTable intTrie finalOffset, bs'')
| let ver = readWord32BE bs 0
, ver == 2
= do let !finalOffset = readWord32BE bs 4
(stringTable, bs') <- StringTable.deserialiseV2 (BS.drop 8 bs)
(intTrie, bs'') <- IntTrie.deserialise bs'
return (TarIndex stringTable intTrie finalOffset, bs'')
| otherwise = Nothing
readWord32BE :: BS.ByteString -> Int -> Word32
readWord32BE bs i =
assert (i >= 0 && i+3 <= BS.length bs - 1) $
fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24
+ fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16
+ fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8
+ fromIntegral (BS.unsafeIndex bs (i + 3))
#ifdef TESTS
prop_lookup :: ValidPaths -> NonEmptyFilePath -> Bool
prop_lookup (ValidPaths paths) (NonEmptyFilePath p) =
case (lookup index p, Prelude.lookup p paths) of
(Nothing, Nothing) -> True
(Just (TarFileEntry offset), Just (_,offset')) -> offset == offset'
(Just (TarDir entries), Nothing) -> sort (nub (map fst entries))
== sort (nub completions)
_ -> False
where
index = construct paths
completions = [ head (FilePath.splitDirectories completion)
| (path,_) <- paths
, completion <- maybeToList $ stripPrefix (p ++ "/") path ]
prop_toList :: ValidPaths -> Bool
prop_toList (ValidPaths paths) =
sort (toList index)
== sort [ (path, off) | (path, (_sz, off)) <- paths ]
where
index = construct paths
prop_valid :: ValidPaths -> Bool
prop_valid (ValidPaths paths)
| not $ StringTable.prop_valid pathbits = error "TarIndex: bad string table"
| not $ IntTrie.prop_lookup intpaths = error "TarIndex: bad int trie"
| not $ IntTrie.prop_completions intpaths = error "TarIndex: bad int trie"
| not $ prop' = error "TarIndex: bad prop"
| otherwise = True
where
index@(TarIndex pathTable _ _) = construct paths
pathbits = concatMap (map BS.Char8.pack . FilePath.splitDirectories . fst)
paths
intpaths = [ (cids, offset)
| (path, (_size, offset)) <- paths
, let Just cids = toComponentIds pathTable path ]
prop' = flip all paths $ \(file, (_size, offset)) ->
case lookup index file of
Just (TarFileEntry offset') -> offset' == offset
_ -> False
prop_serialise_deserialise :: ValidPaths -> Bool
prop_serialise_deserialise (ValidPaths paths) =
Just (index, BS.empty) == (deserialise . serialise) index
where
index = construct paths
prop_serialiseSize :: ValidPaths -> Bool
prop_serialiseSize (ValidPaths paths) =
case (LBS.toChunks . serialiseLBS) index of
[c1] -> BS.length c1 == serialiseSize index
_ -> False
where
index = construct paths
newtype NonEmptyFilePath = NonEmptyFilePath FilePath deriving Show
instance Arbitrary NonEmptyFilePath where
arbitrary = NonEmptyFilePath . FilePath.joinPath
<$> listOf1 (elements ["a", "b", "c", "d"])
newtype ValidPaths = ValidPaths [(FilePath, (Int64, TarEntryOffset))] deriving Show
instance Arbitrary ValidPaths where
arbitrary = do
paths <- makeNoPrefix <$> listOf arbitraryPath
sizes <- vectorOf (length paths) (getNonNegative <$> arbitrary)
let offsets = scanl (\o sz -> o + 1 + blocks sz) 0 sizes
return (ValidPaths (zip paths (zip sizes offsets)))
where
arbitraryPath = FilePath.joinPath
<$> listOf1 (elements ["a", "b", "c", "d"])
makeNoPrefix [] = []
makeNoPrefix (k:ks)
| all (not . isPrefixOfOther k) ks
= k : makeNoPrefix ks
| otherwise = makeNoPrefix ks
isPrefixOfOther a b = a `isPrefixOf` b || b `isPrefixOf` a
blocks :: Int64 -> TarEntryOffset
blocks size = fromIntegral (1 + ((size - 1) `div` 512))
construct :: [(FilePath, (Int64, TarEntryOffset))] -> TarIndex
construct =
either (\_ -> undefined) id
. build
. foldr (\(path, (size, _off)) es -> Next (testEntry path size) es) Done
example0 :: Entries ()
example0 =
testEntry "foo-1.0/foo-1.0.cabal" 1500
`Next` testEntry "foo-1.0/LICENSE" 2000
`Next` testEntry "foo-1.0/Data/Foo.hs" 1000
`Next` Done
example1 :: Entries ()
example1 =
Next (testEntry "./" 1500) Done <> example0
testEntry :: FilePath -> Int64 -> Entry
testEntry name size = simpleEntry path (NormalFile mempty size)
where
Right path = toTarPath False name
data SimpleTarArchive = SimpleTarArchive {
simpleTarEntries :: Tar.Entries ()
, simpleTarRaw :: [(FilePath, LBS.ByteString)]
, simpleTarBS :: LBS.ByteString
}
instance Show SimpleTarArchive where
show = show . simpleTarRaw
prop_index_matches_tar :: SimpleTarArchive -> Property
prop_index_matches_tar sta =
ioProperty (try go >>= either (\e -> throwIO (e :: SomeException))
(\_ -> return True))
where
go :: IO ()
go = do
h <- HBS.readHandle True (simpleTarBS sta)
goEntries h 0 (simpleTarEntries sta)
goEntries :: Handle -> TarEntryOffset -> Tar.Entries () -> IO ()
goEntries _ _ Tar.Done =
return ()
goEntries _ _ (Tar.Fail _) =
throwIO (userError "Fail entry in SimpleTarArchive")
goEntries h offset (Tar.Next e es) = do
goEntry h offset e
goEntries h (nextEntryOffset e offset) es
goEntry :: Handle -> TarEntryOffset -> Tar.Entry -> IO ()
goEntry h offset e = do
e' <- hReadEntry h offset
case (Tar.entryContent e, Tar.entryContent e') of
(Tar.NormalFile bs sz, Tar.NormalFile bs' sz') ->
unless (sz == sz' && bs == bs') $
throwIO $ userError "Entry mismatch"
_otherwise ->
throwIO $ userError "unexpected entry types"
instance Arbitrary SimpleTarArchive where
arbitrary = do
numEntries <- sized $ \n -> choose (0, n)
rawEntries <- mkRaw numEntries
let entries = mkList rawEntries
return SimpleTarArchive {
simpleTarEntries = mkEntries entries
, simpleTarRaw = rawEntries
, simpleTarBS = Tar.write entries
}
where
mkRaw :: Int -> Gen [(FilePath, LBS.ByteString)]
mkRaw 0 = return []
mkRaw n = do
sz <- sized $ \n -> elements (take n fileSizes)
bs <- LBS.pack `fmap` vectorOf sz arbitrary
es <- mkRaw (n - 1)
return $ ("file" ++ show n, bs) : es
mkList :: [(FilePath, LBS.ByteString)] -> [Tar.Entry]
mkList [] = []
mkList ((fp, bs):es) = entry : mkList es
where
Right path = toTarPath False fp
entry = simpleEntry path content
content = NormalFile bs (LBS.length bs)
mkEntries :: [Tar.Entry] -> Tar.Entries ()
mkEntries [] = Tar.Done
mkEntries (e:es) = Tar.Next e (mkEntries es)
fileSizes :: [Int]
fileSizes = [
0 , 1 , 2
, 510 , 511 , 512 , 513 , 514
, 1022 , 1023 , 1024 , 1025 , 1026
]
newtype SimpleIndexBuilder = SimpleIndexBuilder IndexBuilder
deriving Show
instance Arbitrary SimpleIndexBuilder where
arbitrary = SimpleIndexBuilder . build' . simpleTarEntries <$> arbitrary
where
build' :: Show e => Entries e -> IndexBuilder
build' = go empty
where
go !builder (Next e es) = go (addNextEntry e builder) es
go !builder Done = builder
go !_ (Fail err) = error (show err)
prop_finalise_unfinalise :: SimpleIndexBuilder -> Bool
prop_finalise_unfinalise (SimpleIndexBuilder index) =
unfinalise (finalise index) == index
#endif
toStrict :: LBS.ByteString -> BS.ByteString
#if MIN_VERSION_bytestring(0,10,0)
toStrict = LBS.toStrict
#else
toStrict = BS.concat . LBS.toChunks
#endif
#if !(MIN_VERSION_base(4,5,0))
(<>) :: Monoid m => m -> m -> m
(<>) = mappend
#endif