module Darcs.Util.ByteString (
unsafeWithInternals,
unpackPSFromUTF8,
packStringToUTF8,
gzReadFilePS,
mmapFilePS,
gzWriteFilePS,
gzWriteFilePSs,
gzReadStdin,
gzWriteHandle,
FileSegment,
readSegment,
isGZFile,
gzDecompress,
dropSpace,
breakSpace,
linesPS,
unlinesPS,
hashPS,
breakFirstPS,
breakLastPS,
substrPS,
readIntPS,
isFunky,
fromHex2PS,
fromPS2Hex,
betweenLinesPS,
breakAfterNthNewline,
breakBeforeNthNewline,
intercalate,
isAscii,
decodeLocale,
encodeLocale,
decodeString
) where
import Prelude ()
import Darcs.Prelude
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy.Char8 as BL8
import Data.ByteString (intercalate)
import Data.ByteString.Internal (fromForeignPtr)
import Control.Exception ( catch, SomeException )
import System.IO ( withFile, IOMode(ReadMode)
, hSeek, SeekMode(SeekFromEnd,AbsoluteSeek)
, openBinaryFile, hClose, Handle, hGetChar
, stdin)
import System.IO.Unsafe ( unsafePerformIO )
import Foreign.Storable ( peek )
import Foreign.Marshal.Array ( advancePtr )
import Foreign.C.Types ( CInt(..) )
import Data.Bits ( rotateL )
import Data.Char ( ord, isSpace )
import Data.Word ( Word8 )
import Data.Int ( Int32, Int64 )
import qualified Data.Text as T ( pack, unpack )
import Data.Text.Encoding ( encodeUtf8, decodeUtf8With )
import Data.Text.Encoding.Error ( lenientDecode )
import Control.Monad ( when )
#if MIN_VERSION_zlib(0,6,0)
import Control.Monad.ST.Lazy ( ST )
#endif
import Foreign.Ptr ( plusPtr, Ptr )
import Foreign.ForeignPtr ( withForeignPtr )
#ifdef DEBUG_PS
import Foreign.ForeignPtr ( addForeignPtrFinalizer )
import Foreign.Ptr ( FunPtr )
#endif
import qualified Data.ByteString.Lazy as BL
import qualified Codec.Compression.GZip as GZ
import qualified Codec.Compression.Zlib.Internal as ZI
import Darcs.Util.Encoding ( decode, encode )
import Darcs.Util.Global ( addCRCWarning )
#if mingw32_HOST_OS
#else
import System.IO.MMap( mmapFileByteString )
import System.Posix.Files( fileSize, getSymbolicLinkStatus )
#endif
import System.Mem( performGC )
import qualified Bundled.Posix as Bundled ( getFileStatus, fileSize )
unsafeWithInternals :: B.ByteString -> (Ptr Word8 -> Int -> IO a) -> IO a
unsafeWithInternals ps f
= case BI.toForeignPtr ps of
(fp,s,l) -> withForeignPtr fp $ \p -> f (p `plusPtr` s) l
readIntPS :: B.ByteString -> Maybe (Int, B.ByteString)
readIntPS = BC.readInt . BC.dropWhile isSpace
unpackPSFromUTF8 :: B.ByteString -> String
unpackPSFromUTF8 = T.unpack . decodeUtf8With lenientDecode
packStringToUTF8 :: String -> B.ByteString
packStringToUTF8 = encodeUtf8 . T.pack
isSpaceWord8 :: Word8 -> Bool
isSpaceWord8 = (`elem` [0x20, 0x09, 0x0A, 0x0D])
dropSpace :: B.ByteString -> B.ByteString
dropSpace bs = B.dropWhile isSpaceWord8 bs
breakSpace :: B.ByteString -> (B.ByteString, B.ByteString)
breakSpace bs = B.break isSpaceWord8 bs
isFunky :: B.ByteString -> Bool
isFunky ps = case BI.toForeignPtr ps of
(x,s,l) ->
unsafePerformIO $ withForeignPtr x $ \p->
(/=0) `fmap` has_funky_char (p `plusPtr` s) (fromIntegral l)
foreign import ccall unsafe "fpstring.h has_funky_char" has_funky_char
:: Ptr Word8 -> CInt -> IO CInt
hashPS :: B.ByteString -> Int32
hashPS ps =
case BI.toForeignPtr ps of
(x,s,l) ->
unsafePerformIO $ withForeignPtr x $ \p->
hash (p `plusPtr` s) l
hash :: Ptr Word8 -> Int -> IO Int32
hash = f (0 :: Int32)
where f h _ 0 = return h
f h p n = do x <- peek p
let !h' = fromIntegral x + rotateL h 8
f h' (p `advancePtr` 1) (n1)
substrPS :: B.ByteString -> B.ByteString -> Maybe Int
substrPS tok str
| B.null tok = Just 0
| B.length tok > B.length str = Nothing
| otherwise = do n <- BC.elemIndex (BC.head tok) str
let ttok = B.tail tok
reststr = B.drop (n+1) str
if ttok == B.take (B.length ttok) reststr
then Just n
else ((n+1)+) `fmap` substrPS tok reststr
breakFirstPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakFirstPS c p = case BC.elemIndex c p of
Nothing -> Nothing
Just n -> Just (B.take n p, B.drop (n+1) p)
breakLastPS :: Char -> B.ByteString -> Maybe (B.ByteString,B.ByteString)
breakLastPS c p = case BC.elemIndexEnd c p of
Nothing -> Nothing
Just n -> Just (B.take n p, B.drop (n+1) p)
linesPS :: B.ByteString -> [B.ByteString]
linesPS ps
| B.null ps = [B.empty]
| otherwise = BC.split '\n' ps
unlinesPS :: [B.ByteString] -> B.ByteString
unlinesPS [] = BC.empty
unlinesPS x = BC.init $ BC.unlines x
gzDecompress :: Maybe Int -> BL.ByteString -> ([B.ByteString], Bool)
gzDecompress mbufsize =
#if MIN_VERSION_zlib(0,6,0)
decompressWarn (ZI.decompressST ZI.gzipFormat decompressParams)
#else
toListWarn . ZI.decompressWithErrors ZI.gzipFormat decompressParams
#endif
where
decompressParams = case mbufsize of
Just bufsize -> GZ.defaultDecompressParams { GZ.decompressBufferSize = bufsize }
Nothing -> GZ.defaultDecompressParams
#if MIN_VERSION_zlib(0,6,0)
decompressWarn :: (forall s . ZI.DecompressStream (ST s)) -> BL.ByteString -> ([B.ByteString], Bool)
decompressWarn = ZI.foldDecompressStreamWithInput
(\x ~(xs, b) -> (x:xs, b))
(\xs -> if BL.null xs
then ([], False)
else error "trailing data at end of compressed stream"
)
handleBad
#else
toListWarn :: ZI.DecompressStream -> ([B.ByteString], Bool)
toListWarn = foldDecompressStream (\x ~(xs, b) -> (x:xs, b)) ([], False) handleBad
foldDecompressStream :: (B.ByteString -> a -> a) -> a
-> (ZI.DecompressError -> String -> a)
-> ZI.DecompressStream -> a
foldDecompressStream chunk end err = fold
where
fold ZI.StreamEnd = end
fold (ZI.StreamChunk bs stream) = chunk bs (fold stream)
fold (ZI.StreamError code msg) = err code msg
#endif
#if MIN_VERSION_zlib(0,6,0)
handleBad (ZI.DataFormatError "incorrect data check") = ([], True)
handleBad e = error (show e)
#else
handleBad ZI.DataError "incorrect data check" = ([], True)
handleBad _ msg = error msg
#endif
isGZFile :: FilePath -> IO (Maybe Int)
isGZFile f = do
h <- openBinaryFile f ReadMode
header <- B.hGet h 2
if header /= BC.pack "\31\139"
then do hClose h
return Nothing
else do hSeek h SeekFromEnd (4)
len <- hGetLittleEndInt h
hClose h
return (Just len)
gzReadFilePS :: FilePath -> IO B.ByteString
gzReadFilePS f = do
mlen <- isGZFile f
case mlen of
Nothing -> mmapFilePS f
Just len ->
do
let doDecompress buf = let (res, bad) = gzDecompress (Just len) buf
in do when bad $ addCRCWarning f
return res
compressed <- (BL.fromChunks . return) `fmap` mmapFilePS f
B.concat `fmap` doDecompress compressed
hGetLittleEndInt :: Handle -> IO Int
hGetLittleEndInt h = do
b1 <- ord `fmap` hGetChar h
b2 <- ord `fmap` hGetChar h
b3 <- ord `fmap` hGetChar h
b4 <- ord `fmap` hGetChar h
return $ b1 + 256*b2 + 65536*b3 + 16777216*b4
gzWriteFilePS :: FilePath -> B.ByteString -> IO ()
gzWriteFilePS f ps = gzWriteFilePSs f [ps]
gzWriteFilePSs :: FilePath -> [B.ByteString] -> IO ()
gzWriteFilePSs f pss =
BL.writeFile f $ GZ.compress $ BL.fromChunks pss
gzWriteHandle :: Handle -> [B.ByteString] -> IO ()
gzWriteHandle h pss =
BL.hPut h $ GZ.compress $ BL.fromChunks pss
gzReadStdin :: IO B.ByteString
gzReadStdin = do
header <- B.hGet stdin 2
rest <- B.hGetContents stdin
let allStdin = B.concat [header,rest]
return $
if header /= BC.pack "\31\139"
then allStdin
else let decompress = fst . gzDecompress Nothing
compressed = BL.fromChunks [allStdin]
in
B.concat $ decompress compressed
type FileSegment = (FilePath, Maybe (Int64, Int))
readSegment :: FileSegment -> IO BL.ByteString
readSegment (f,range) = do
bs <- tryToRead
`catch` (\(_::SomeException) -> do
size <- Bundled.fileSize `fmap` Bundled.getFileStatus f
if size == 0
then return BC.empty
else performGC >> tryToRead)
return $ BL8.fromChunks [bs]
where
tryToRead =
case range of
Nothing -> B.readFile f
Just (off, size) -> withFile f ReadMode $ \h -> do
hSeek h AbsoluteSeek $ fromIntegral off
B.hGet h size
mmapFilePS :: FilePath -> IO B.ByteString
#if mingw32_HOST_OS
mmapFilePS = B.readFile
#else
mmapFilePS f =
mmapFileByteString f Nothing
`catch` (\(_ :: SomeException) -> do
size <- fileSize `fmap` getSymbolicLinkStatus f
if size == 0
then return B.empty
else performGC >> mmapFileByteString f Nothing)
#endif
foreign import ccall unsafe "static fpstring.h conv_to_hex" conv_to_hex
:: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
fromPS2Hex :: B.ByteString -> B.ByteString
fromPS2Hex ps = case BI.toForeignPtr ps of
(x,s,l) ->
BI.unsafeCreate (2*l) $ \p -> withForeignPtr x $ \f ->
conv_to_hex p (f `plusPtr` s) $ fromIntegral l
foreign import ccall unsafe "static fpstring.h conv_from_hex" conv_from_hex
:: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
fromHex2PS :: B.ByteString -> B.ByteString
fromHex2PS ps = case BI.toForeignPtr ps of
(x,s,l) ->
BI.unsafeCreate (l `div` 2) $ \p -> withForeignPtr x $ \f ->
conv_from_hex p (f `plusPtr` s) (fromIntegral $ l `div` 2)
betweenLinesPS :: B.ByteString -> B.ByteString -> B.ByteString
-> Maybe B.ByteString
betweenLinesPS start end ps
= case break (start ==) (linesPS ps) of
(_, _:rest@(bs1:_)) ->
case BI.toForeignPtr bs1 of
(ps1,s1,_) ->
case break (end ==) rest of
(_, bs2:_) -> case BI.toForeignPtr bs2 of (_,s2,_) -> Just $ fromForeignPtr ps1 s1 (s2 s1)
_ -> Nothing
_ -> Nothing
breakAfterNthNewline :: Int -> B.ByteString
-> Maybe (B.ByteString, B.ByteString)
breakAfterNthNewline 0 the_ps | B.null the_ps = Just (B.empty, B.empty)
| otherwise = Just (B.empty, the_ps)
breakAfterNthNewline n the_ps =
go n (B.elemIndices (BI.c2w '\n') the_ps)
where go 0 [] = Just (the_ps, B.empty)
go _ [] = Nothing
go 1 (i:_) = Just $ B.splitAt (i+1) the_ps
go !m (_:is) = go (m1) is
breakBeforeNthNewline :: Int -> B.ByteString -> (B.ByteString, B.ByteString)
breakBeforeNthNewline 0 the_ps
| B.null the_ps = (B.empty, B.empty)
breakBeforeNthNewline n the_ps =
go n (B.elemIndices (BI.c2w '\n') the_ps)
where go _ [] = (the_ps, B.empty)
go 0 (i:_) = B.splitAt i the_ps
go !m (_:is) = go (m1) is
isAscii :: B.ByteString -> Bool
isAscii = B.all (< 128)
decodeLocale :: B.ByteString -> String
decodeLocale = unsafePerformIO . decode
encodeChar8 :: String -> B.ByteString
encodeChar8 = B.pack . map (fromIntegral . ord)
encodeLocale :: String -> B.ByteString
encodeLocale = unsafePerformIO . encode
decodeString :: String -> IO String
decodeString = decode . encodeChar8