#include "inline.hs" -- | -- Module : Streamly.Internal.FileSystem.Dir -- Copyright : (c) 2018 Composewell Technologies -- -- License : BSD3 -- Maintainer : streamly@composewell.com -- Portability : GHC module Streamly.Internal.FileSystem.Dir ( -- * Streams read -- read not just the names but also the inode attrs of the children. This -- abstraction makes sense because when we read the dir contents we also -- get the inodes, and it is cheaper to get the attrs from the inodes -- instead of resolving the paths and get those. This abstraction may be -- less portable as different platforms may have different attrs. To -- optimize, we can also add a filter/pattern/parser on the names of the -- children that we want to read. We can call that readAttrsWith? Or just -- have the default readAttrs do that? Usually we won't need that, so it -- may be better to keep that a separate API. -- , readAttrs -- recursive read requires us to read the attributes of the children to -- determine if something is a dirctory or not. Therefore, it may be a good -- idea to have a low level routine that also spits out the attributes of -- the files, we get that for free. We can also add a filter/pattern/parser -- on the names of the children that we want to read. --, readAttrsRecursive -- Options: acyclic, follow symlinks , readFiles , readDirs , readEither , readEitherPaths -- We can implement this in terms of readAttrsRecursive without losing -- perf. -- , readEitherRecursive -- Options: acyclic, follow symlinks -- , readAncestors -- read the parent chain using the .. entry. -- , readAncestorsAttrs -- * Unfolds -- | Use the more convenient stream APIs instead of unfolds where possible. , reader , fileReader , dirReader , eitherReader , eitherReaderPaths {- , toStreamWithBufferOf , readChunks , readChunksWithBufferOf , toChunksWithBufferOf , toChunks , write , writeWithBufferOf -- Byte stream write (Streams) , fromStream , fromStreamWithBufferOf -- -- * Array Write , writeArray , writeChunks , writeChunksWithBufferOf -- -- * Array stream Write , fromChunks , fromChunksWithBufferOf -} -- * Deprecated , toStream , toEither , toFiles , toDirs ) where import Control.Monad.Catch (MonadCatch) import Control.Monad.IO.Class (MonadIO(..)) import Data.Bifunctor (bimap) import Data.Either (isRight, isLeft, fromLeft, fromRight) import Data.Function ((&)) import Streamly.Data.Stream (Stream) import Streamly.Internal.Data.Unfold (Step(..)) import Streamly.Internal.Data.Unfold.Type (Unfold(..)) import System.FilePath (()) #if (defined linux_HOST_OS) || (defined darwin_HOST_OS) import System.Posix (DirStream, openDirStream, readDirStream, closeDirStream) #elif defined(mingw32_HOST_OS) import qualified System.Win32 as Win32 #else #error "Unsupported architecture" #endif import qualified Streamly.Data.Unfold as UF import qualified Streamly.Internal.Data.Unfold as UF (mapM2, bracketIO) import qualified Streamly.Data.Stream as S import qualified System.Directory as Dir import Prelude hiding (read) {- {-# INLINABLE readArrayUpto #-} readArrayUpto :: Int -> Handle -> IO (Array Word8) readArrayUpto size h = do ptr <- mallocPlainForeignPtrBytes size -- ptr <- mallocPlainForeignPtrAlignedBytes size (alignment (undefined :: Word8)) withForeignPtr ptr $ \p -> do n <- hGetBufSome h p size let v = Array { aStart = ptr , arrEnd = p `plusPtr` n , arrBound = p `plusPtr` size } -- XXX shrink only if the diff is significant shrinkToFit v ------------------------------------------------------------------------------- -- Stream of Arrays IO ------------------------------------------------------------------------------- -- | @toChunksWithBufferOf size h@ reads a stream of arrays from file handle @h@. -- The maximum size of a single array is specified by @size@. The actual size -- read may be less than or equal to @size@. {-# INLINE _toChunksWithBufferOf #-} _toChunksWithBufferOf :: MonadIO m => Int -> Handle -> Stream m (Array Word8) _toChunksWithBufferOf size h = go where -- XXX use cons/nil instead go = mkStream $ \_ yld _ stp -> do arr <- liftIO $ readArrayUpto size h if A.length arr == 0 then stp else yld arr go -- | @toChunksWithBufferOf size handle@ reads a stream of arrays from the file -- handle @handle@. The maximum size of a single array is limited to @size@. -- The actual size read may be less than or equal to @size@. -- -- @since 0.7.0 {-# INLINE_NORMAL toChunksWithBufferOf #-} toChunksWithBufferOf :: MonadIO m => Int -> Handle -> Stream m (Array Word8) toChunksWithBufferOf size h = D.fromStreamD (D.Stream step ()) where {-# INLINE_LATE step #-} step _ _ = do arr <- liftIO $ readArrayUpto size h return $ case A.length arr of 0 -> D.Stop _ -> D.Yield arr () -- | Unfold the tuple @(bufsize, handle)@ into a stream of 'Word8' arrays. -- Read requests to the IO device are performed using a buffer of size -- @bufsize@. The size of an array in the resulting stream is always less than -- or equal to @bufsize@. -- -- @since 0.7.0 {-# INLINE_NORMAL readChunksWithBufferOf #-} readChunksWithBufferOf :: MonadIO m => Unfold m (Int, Handle) (Array Word8) readChunksWithBufferOf = Unfold step return where {-# INLINE_LATE step #-} step (size, h) = do arr <- liftIO $ readArrayUpto size h return $ case A.length arr of 0 -> D.Stop _ -> D.Yield arr (size, h) -- XXX read 'Array a' instead of Word8 -- -- | @toChunks handle@ reads a stream of arrays from the specified file -- handle. The maximum size of a single array is limited to -- @defaultChunkSize@. The actual size read may be less than or equal to -- @defaultChunkSize@. -- -- > toChunks = toChunksWithBufferOf defaultChunkSize -- -- @since 0.7.0 {-# INLINE toChunks #-} toChunks :: MonadIO m => Handle -> Stream m (Array Word8) toChunks = toChunksWithBufferOf defaultChunkSize -- | Unfolds a handle into a stream of 'Word8' arrays. Requests to the IO -- device are performed using a buffer of size -- 'Streamly.Internal.Data.Array.Type.defaultChunkSize'. The -- size of arrays in the resulting stream are therefore less than or equal to -- 'Streamly.Internal.Data.Array.Type.defaultChunkSize'. -- -- @since 0.7.0 {-# INLINE readChunks #-} readChunks :: MonadIO m => Unfold m Handle (Array Word8) readChunks = UF.first readChunksWithBufferOf defaultChunkSize ------------------------------------------------------------------------------- -- Read a Directory to Stream ------------------------------------------------------------------------------- -- TODO for concurrent streams implement readahead IO. We can send multiple -- read requests at the same time. For serial case we can use async IO. We can -- also control the read throughput in mbps or IOPS. -- | Unfolds the tuple @(bufsize, handle)@ into a byte stream, read requests -- to the IO device are performed using buffers of @bufsize@. -- -- @since 0.7.0 {-# INLINE readWithBufferOf #-} readWithBufferOf :: MonadIO m => Unfold m (Int, Handle) Word8 readWithBufferOf = UF.many readChunksWithBufferOf A.read -- | @toStreamWithBufferOf bufsize handle@ reads a byte stream from a file -- handle, reads are performed in chunks of up to @bufsize@. -- -- /Pre-release/ {-# INLINE toStreamWithBufferOf #-} toStreamWithBufferOf :: MonadIO m => Int -> Handle -> Stream m Word8 toStreamWithBufferOf chunkSize h = AS.concat $ toChunksWithBufferOf chunkSize h -} -- read child node names from a dir filtering out . and .. -- -- . and .. are an implementation artifact, and should probably not be used in -- user level abstractions. -- -- . does not seem to have any useful purpose. If we have the path of the dir -- then we will resolve it to get the inode of the dir so the . entry would be -- redundant. If we have the inode of the dir to read the dir then it is -- redundant. Is this for cross check when doing fsck? -- -- For .. we have the readAncestors API, we should not have this in the -- readChildren API. -- XXX exception handling #if (defined linux_HOST_OS) || (defined darwin_HOST_OS) {-# INLINE streamReader #-} streamReader :: MonadIO m => Unfold m DirStream FilePath streamReader = Unfold step return where step strm = do -- XXX Use readDirStreamMaybe file <- liftIO $ readDirStream strm case file of [] -> return Stop _ -> return $ Yield file strm #elif defined(mingw32_HOST_OS) openDirStream :: String -> IO (Win32.HANDLE, Win32.FindData) openDirStream = Win32.findFirstFile closeDirStream :: (Win32.HANDLE, Win32.FindData) -> IO () closeDirStream (h, _) = Win32.findClose h {-# INLINE streamReader #-} streamReader :: MonadIO m => Unfold m (Win32.HANDLE, Win32.FindData) FilePath streamReader = Unfold step return where step (h, fdat) = do more <- liftIO $ Win32.findNextFile h fdat if more then do file <- liftIO $ Win32.getFindDataFileName fdat return $ Yield file (h, fdat) else return Stop #endif -- | Read a directory emitting a stream with names of the children. Filter out -- "." and ".." entries. -- -- /Internal/ -- {-# INLINE reader #-} reader :: (MonadIO m, MonadCatch m) => Unfold m FilePath FilePath reader = -- XXX Instead of using bracketIO for each iteration of the loop we should -- instead yield a buffer of dir entries in each iteration and then use an -- unfold and concat to flatten those entries. That should improve the -- performance. UF.bracketIO openDirStream closeDirStream streamReader & UF.filter (\x -> x /= "." && x /= "..") -- XXX We can use a more general mechanism to filter the contents of a -- directory. We can just stat each child and pass on the stat information. We -- can then use that info to do a general filtering. "find" like filters can be -- created. -- | Read directories as Left and files as Right. Filter out "." and ".." -- entries. -- -- /Internal/ -- {-# INLINE eitherReader #-} eitherReader :: (MonadIO m, MonadCatch m) => Unfold m FilePath (Either FilePath FilePath) eitherReader = UF.mapM2 classify reader where classify dir x = do r <- liftIO $ Dir.doesDirectoryExist (dir ++ "/" ++ x) return $ if r then Left x else Right x {-# INLINE eitherReaderPaths #-} eitherReaderPaths ::(MonadIO m, MonadCatch m) => Unfold m FilePath (Either FilePath FilePath) eitherReaderPaths = UF.mapM2 (\dir -> return . bimap (dir ) (dir )) eitherReader -- -- | Read files only. -- -- /Internal/ -- {-# INLINE fileReader #-} fileReader :: (MonadIO m, MonadCatch m) => Unfold m FilePath FilePath fileReader = fmap (fromRight undefined) $ UF.filter isRight eitherReader -- | Read directories only. Filter out "." and ".." entries. -- -- /Internal/ -- {-# INLINE dirReader #-} dirReader :: (MonadIO m, MonadCatch m) => Unfold m FilePath FilePath dirReader = fmap (fromLeft undefined) $ UF.filter isLeft eitherReader -- | Raw read of a directory. -- -- /Pre-release/ {-# INLINE read #-} read :: (MonadIO m, MonadCatch m) => FilePath -> Stream m FilePath read = S.unfold reader {-# DEPRECATED toStream "Please use 'read' instead" #-} {-# INLINE toStream #-} toStream :: (MonadIO m, MonadCatch m) => String -> Stream m String toStream = read -- | Read directories as Left and files as Right. Filter out "." and ".." -- entries. The output contains the names of the directories and files. -- -- /Pre-release/ {-# INLINE readEither #-} readEither :: (MonadIO m, MonadCatch m) => FilePath -> Stream m (Either FilePath FilePath) readEither = S.unfold eitherReader -- | Like 'readEither' but prefix the names of the files and directories with -- the supplied directory path. {-# INLINE readEitherPaths #-} readEitherPaths :: (MonadIO m, MonadCatch m) => FilePath -> Stream m (Either FilePath FilePath) readEitherPaths dir = fmap (bimap (dir ) (dir )) $ readEither dir {-# DEPRECATED toEither "Please use 'readEither' instead" #-} {-# INLINE toEither #-} toEither :: (MonadIO m, MonadCatch m) => FilePath -> Stream m (Either FilePath FilePath) toEither = readEither -- | Read files only. -- -- /Internal/ -- {-# INLINE readFiles #-} readFiles :: (MonadIO m, MonadCatch m) => FilePath -> Stream m FilePath readFiles = S.unfold fileReader {-# DEPRECATED toFiles "Please use 'readFiles' instead" #-} {-# INLINE toFiles #-} toFiles :: (MonadIO m, MonadCatch m) => FilePath -> Stream m FilePath toFiles = readFiles -- | Read directories only. -- -- /Internal/ -- {-# INLINE readDirs #-} readDirs :: (MonadIO m, MonadCatch m) => FilePath -> Stream m FilePath readDirs = S.unfold dirReader {-# DEPRECATED toDirs "Please use 'readDirs' instead" #-} {-# INLINE toDirs #-} toDirs :: (MonadIO m, MonadCatch m) => String -> Stream m String toDirs = readDirs {- ------------------------------------------------------------------------------- -- Writing ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- -- Array IO (output) ------------------------------------------------------------------------------- -- | Write an 'Array' to a file handle. -- -- @since 0.7.0 {-# INLINABLE writeArray #-} writeArray :: Storable a => Handle -> Array a -> IO () writeArray _ arr | A.length arr == 0 = return () writeArray h Array{..} = withForeignPtr aStart $ \p -> hPutBuf h p aLen where aLen = let p = unsafeForeignPtrToPtr aStart in arrEnd `minusPtr` p ------------------------------------------------------------------------------- -- Stream of Arrays IO ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- -- Writing ------------------------------------------------------------------------------- -- | Write a stream of arrays to a handle. -- -- @since 0.7.0 {-# INLINE fromChunks #-} fromChunks :: (MonadIO m, Storable a) => Handle -> Stream m (Array a) -> m () fromChunks h m = S.mapM_ (liftIO . writeArray h) m -- | @fromChunksWithBufferOf bufsize handle stream@ writes a stream of arrays -- to @handle@ after coalescing the adjacent arrays in chunks of @bufsize@. -- The chunk size is only a maximum and the actual writes could be smaller as -- we do not split the arrays to fit exactly to the specified size. -- -- @since 0.7.0 {-# INLINE fromChunksWithBufferOf #-} fromChunksWithBufferOf :: (MonadIO m, Storable a) => Int -> Handle -> Stream m (Array a) -> m () fromChunksWithBufferOf n h xs = fromChunks h $ AS.compact n xs -- | @fromStreamWithBufferOf bufsize handle stream@ writes @stream@ to @handle@ -- in chunks of @bufsize@. A write is performed to the IO device as soon as we -- collect the required input size. -- -- @since 0.7.0 {-# INLINE fromStreamWithBufferOf #-} fromStreamWithBufferOf :: MonadIO m => Int -> Handle -> Stream m Word8 -> m () fromStreamWithBufferOf n h m = fromChunks h $ S.pinnedChunksOf n m -- fromStreamWithBufferOf n h m = fromChunks h $ AS.chunksOf n m -- > write = 'writeWithBufferOf' A.defaultChunkSize -- -- | Write a byte stream to a file handle. Accumulates the input in chunks of -- up to 'Streamly.Internal.Data.Array.Type.defaultChunkSize' before writing. -- -- NOTE: This may perform better than the 'write' fold, you can try this if you -- need some extra perf boost. -- -- @since 0.7.0 {-# INLINE fromStream #-} fromStream :: MonadIO m => Handle -> Stream m Word8 -> m () fromStream = fromStreamWithBufferOf defaultChunkSize -- | Write a stream of arrays to a handle. Each array in the stream is written -- to the device as a separate IO request. -- -- @since 0.7.0 {-# INLINE writeChunks #-} writeChunks :: (MonadIO m, Storable a) => Handle -> Fold m (Array a) () writeChunks h = FL.drainBy (liftIO . writeArray h) -- | @writeChunksWithBufferOf bufsize handle@ writes a stream of arrays -- to @handle@ after coalescing the adjacent arrays in chunks of @bufsize@. -- We never split an array, if a single array is bigger than the specified size -- it emitted as it is. Multiple arrays are coalesed as long as the total size -- remains below the specified size. -- -- @since 0.7.0 {-# INLINE writeChunksWithBufferOf #-} writeChunksWithBufferOf :: (MonadIO m, Storable a) => Int -> Handle -> Fold m (Array a) () writeChunksWithBufferOf n h = lpackArraysChunksOf n (writeChunks h) -- GHC buffer size dEFAULT_FD_BUFFER_SIZE=8192 bytes. -- -- XXX test this -- Note that if you use a chunk size less than 8K (GHC's default buffer -- size) then you are advised to use 'NOBuffering' mode on the 'Handle' in case you -- do not want buffering to occur at GHC level as well. Same thing applies to -- writes as well. -- | @writeWithBufferOf reqSize handle@ writes the input stream to @handle@. -- Bytes in the input stream are collected into a buffer until we have a chunk -- of @reqSize@ and then written to the IO device. -- -- @since 0.7.0 {-# INLINE writeWithBufferOf #-} writeWithBufferOf :: MonadIO m => Int -> Handle -> Fold m Word8 () writeWithBufferOf n h = FL.groupsOf n (pinnedWriteNUnsafe n) (writeChunks h) -- > write = 'writeWithBufferOf' A.defaultChunkSize -- -- | Write a byte stream to a file handle. Accumulates the input in chunks of -- up to 'Streamly.Internal.Data.Array.Type.defaultChunkSize' before writing -- to the IO device. -- -- @since 0.7.0 {-# INLINE write #-} write :: MonadIO m => Handle -> Fold m Word8 () write = writeWithBufferOf defaultChunkSize -}