{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE GADTs #-}
module Data.Warc
( Warc(..)
, Record(..)
, parseWarc
, iterRecords
, produceRecords
, encodeRecord
, module Data.Warc.Header
) where
import Data.Char (ord)
import Pipes hiding (each)
import qualified Pipes.ByteString as PBS
import Control.Lens
import qualified Pipes.Attoparsec as PA
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as BB
import Data.ByteString (ByteString)
import Control.Monad (join)
import Control.Monad.Trans.Free
import Control.Monad.Trans.State.Strict
import Data.Warc.Header
data Record m r = Record { :: RecordHeader
, Record m r -> Producer ByteString m r
recContent :: Producer BS.ByteString m r
}
instance Monad m => Functor (Record m) where
fmap :: (a -> b) -> Record m a -> Record m b
fmap a -> b
f (Record RecordHeader
hdr Producer ByteString m a
r) = RecordHeader -> Producer ByteString m b -> Record m b
forall (m :: * -> *) r.
RecordHeader -> Producer ByteString m r -> Record m r
Record RecordHeader
hdr ((a -> b) -> Producer ByteString m a -> Producer ByteString m b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f Producer ByteString m a
r)
type Warc m a = FreeT (Record m) m (Producer BS.ByteString m a)
parseWarc :: (Functor m, Monad m)
=> Producer ByteString m a
-> Warc m a
parseWarc :: Producer ByteString m a -> Warc m a
parseWarc = Producer ByteString m a -> Warc m a
forall (m :: * -> *) x.
Monad m =>
Producer ByteString m x
-> FreeT (Record m) m (Producer ByteString m x)
loop
where
loop :: Producer ByteString m x
-> FreeT (Record m) m (Producer ByteString m x)
loop Producer ByteString m x
upstream = m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
-> FreeT (Record m) m (Producer ByteString m x)
forall (f :: * -> *) (m :: * -> *) a.
m (FreeF f a (FreeT f m a)) -> FreeT f m a
FreeT (m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
-> FreeT (Record m) m (Producer ByteString m x))
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
-> FreeT (Record m) m (Producer ByteString m x)
forall a b. (a -> b) -> a -> b
$ do
(Maybe (Either ParsingError RecordHeader)
hdr, Producer ByteString m x
rest) <- StateT
(Producer ByteString m x)
m
(Maybe (Either ParsingError RecordHeader))
-> Producer ByteString m x
-> m (Maybe (Either ParsingError RecordHeader),
Producer ByteString m x)
forall s (m :: * -> *) a. StateT s m a -> s -> m (a, s)
runStateT (Parser ByteString RecordHeader
-> Parser ByteString m (Maybe (Either ParsingError RecordHeader))
forall (m :: * -> *) a b.
(Monad m, ParserInput a) =>
Parser a b -> Parser a m (Maybe (Either ParsingError b))
PA.parse Parser ByteString RecordHeader
header) Producer ByteString m x
upstream
Maybe (Either ParsingError RecordHeader)
-> Producer ByteString m x
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
go Maybe (Either ParsingError RecordHeader)
hdr Producer ByteString m x
rest
go :: Maybe (Either ParsingError RecordHeader)
-> Producer ByteString m x
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
go Maybe (Either ParsingError RecordHeader)
mhdr Producer ByteString m x
rest
| Maybe (Either ParsingError RecordHeader)
Nothing <- Maybe (Either ParsingError RecordHeader)
mhdr = FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
forall (m :: * -> *) a. Monad m => a -> m a
return (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))))
-> FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
forall a b. (a -> b) -> a -> b
$ Producer ByteString m x
-> FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))
forall (f :: * -> *) a b. a -> FreeF f a b
Pure Producer ByteString m x
rest
| Just (Left ParsingError
err) <- Maybe (Either ParsingError RecordHeader)
mhdr = [Char]
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
forall a. HasCallStack => [Char] -> a
error ([Char]
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))))
-> [Char]
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
forall a b. (a -> b) -> a -> b
$ ParsingError -> [Char]
forall a. Show a => a -> [Char]
show ParsingError
err
| Just (Right RecordHeader
hdr) <- Maybe (Either ParsingError RecordHeader)
mhdr
, Just (Right Integer
len) <- RecordHeader -> Field Integer -> Maybe (Either [Char] Integer)
forall a. RecordHeader -> Field a -> Maybe (Either [Char] a)
lookupField RecordHeader
hdr Field Integer
contentLength = do
let produceBody :: Producer ByteString m x
-> Proxy X () () ByteString m (Producer ByteString m x)
produceBody = (Producer ByteString m x -> Producer ByteString m x)
-> Proxy X () () ByteString m (Producer ByteString m x)
-> Proxy X () () ByteString m (Producer ByteString m x)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Producer ByteString m x -> Producer ByteString m x
consumeWhitespace (Proxy X () () ByteString m (Producer ByteString m x)
-> Proxy X () () ByteString m (Producer ByteString m x))
-> (Producer ByteString m x
-> Proxy X () () ByteString m (Producer ByteString m x))
-> Producer ByteString m x
-> Proxy X () () ByteString m (Producer ByteString m x)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Getting
(Proxy X () () ByteString m (Producer ByteString m x))
(Producer ByteString m x)
(Proxy X () () ByteString m (Producer ByteString m x))
-> Producer ByteString m x
-> Proxy X () () ByteString m (Producer ByteString m x)
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view (Integer
-> Lens'
(Producer ByteString m x)
(Proxy X () () ByteString m (Producer ByteString m x))
forall (m :: * -> *) n x.
(Monad m, Integral n) =>
n
-> Lens'
(Producer ByteString m x)
(Producer ByteString m (Producer ByteString m x))
PBS.splitAt Integer
len)
consumeWhitespace :: Producer ByteString m x -> Producer ByteString m x
consumeWhitespace = (Word8 -> Bool)
-> Producer ByteString m x -> Producer ByteString m x
forall (m :: * -> *) r.
Monad m =>
(Word8 -> Bool)
-> Producer ByteString m r -> Producer ByteString m r
PBS.dropWhile Word8 -> Bool
isEOL
isEOL :: Word8 -> Bool
isEOL Word8
c = Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'\r' Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'\n'
ord8 :: Char -> Word8
ord8 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord
FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
forall (m :: * -> *) a. Monad m => a -> m a
return (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))))
-> FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))
-> m (FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
forall a b. (a -> b) -> a -> b
$ Record m (FreeT (Record m) m (Producer ByteString m x))
-> FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))
forall (f :: * -> *) a b. f b -> FreeF f a b
Free (Record m (FreeT (Record m) m (Producer ByteString m x))
-> FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x)))
-> Record m (FreeT (Record m) m (Producer ByteString m x))
-> FreeF
(Record m)
(Producer ByteString m x)
(FreeT (Record m) m (Producer ByteString m x))
forall a b. (a -> b) -> a -> b
$ RecordHeader
-> Producer
ByteString m (FreeT (Record m) m (Producer ByteString m x))
-> Record m (FreeT (Record m) m (Producer ByteString m x))
forall (m :: * -> *) r.
RecordHeader -> Producer ByteString m r -> Record m r
Record RecordHeader
hdr (Producer
ByteString m (FreeT (Record m) m (Producer ByteString m x))
-> Record m (FreeT (Record m) m (Producer ByteString m x)))
-> Producer
ByteString m (FreeT (Record m) m (Producer ByteString m x))
-> Record m (FreeT (Record m) m (Producer ByteString m x))
forall a b. (a -> b) -> a -> b
$ (Producer ByteString m x
-> FreeT (Record m) m (Producer ByteString m x))
-> Proxy X () () ByteString m (Producer ByteString m x)
-> Producer
ByteString m (FreeT (Record m) m (Producer ByteString m x))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Producer ByteString m x
-> FreeT (Record m) m (Producer ByteString m x)
loop (Proxy X () () ByteString m (Producer ByteString m x)
-> Producer
ByteString m (FreeT (Record m) m (Producer ByteString m x)))
-> Proxy X () () ByteString m (Producer ByteString m x)
-> Producer
ByteString m (FreeT (Record m) m (Producer ByteString m x))
forall a b. (a -> b) -> a -> b
$ Producer ByteString m x
-> Proxy X () () ByteString m (Producer ByteString m x)
produceBody Producer ByteString m x
rest
iterRecords :: forall m a. Monad m
=> (forall b. Record m b -> m b)
-> Warc m a
-> m (Producer BS.ByteString m a)
iterRecords :: (forall b. Record m b -> m b)
-> Warc m a -> m (Producer ByteString m a)
iterRecords forall b. Record m b -> m b
f Warc m a
warc = (Record m (m (Producer ByteString m a))
-> m (Producer ByteString m a))
-> Warc m a -> m (Producer ByteString m a)
forall (f :: * -> *) (m :: * -> *) a.
(Functor f, Monad m) =>
(f (m a) -> m a) -> FreeT f m a -> m a
iterT Record m (m (Producer ByteString m a))
-> m (Producer ByteString m a)
iter Warc m a
warc
where
iter :: Record m (m (Producer BS.ByteString m a))
-> m (Producer BS.ByteString m a)
iter :: Record m (m (Producer ByteString m a))
-> m (Producer ByteString m a)
iter Record m (m (Producer ByteString m a))
r = m (m (Producer ByteString m a)) -> m (Producer ByteString m a)
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (m (m (Producer ByteString m a)) -> m (Producer ByteString m a))
-> m (m (Producer ByteString m a)) -> m (Producer ByteString m a)
forall a b. (a -> b) -> a -> b
$ Record m (m (Producer ByteString m a))
-> m (m (Producer ByteString m a))
forall b. Record m b -> m b
f Record m (m (Producer ByteString m a))
r
produceRecords :: forall m o a. Monad m
=> (forall b. RecordHeader -> Producer BS.ByteString m b
-> Producer o m b)
-> Warc m a
-> Producer o m (Producer BS.ByteString m a)
produceRecords :: (forall b.
RecordHeader -> Producer ByteString m b -> Producer o m b)
-> Warc m a -> Producer o m (Producer ByteString m a)
produceRecords forall b. RecordHeader -> Producer ByteString m b -> Producer o m b
f Warc m a
warc = (Record m (Producer o m (Producer ByteString m a))
-> Producer o m (Producer ByteString m a))
-> Warc m a -> Producer o m (Producer ByteString m a)
forall (f :: * -> *) (m :: * -> *) (t :: (* -> *) -> * -> *) a.
(Functor f, Monad m, MonadTrans t, Monad (t m)) =>
(f (t m a) -> t m a) -> FreeT f m a -> t m a
iterTM Record m (Producer o m (Producer ByteString m a))
-> Producer o m (Producer ByteString m a)
iter Warc m a
warc
where
iter :: Record m (Producer o m (Producer BS.ByteString m a))
-> Producer o m (Producer BS.ByteString m a)
iter :: Record m (Producer o m (Producer ByteString m a))
-> Producer o m (Producer ByteString m a)
iter (Record RecordHeader
hdr Producer ByteString m (Producer o m (Producer ByteString m a))
body) = Proxy X () () o m (Producer o m (Producer ByteString m a))
-> Producer o m (Producer ByteString m a)
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Proxy X () () o m (Producer o m (Producer ByteString m a))
-> Producer o m (Producer ByteString m a))
-> Proxy X () () o m (Producer o m (Producer ByteString m a))
-> Producer o m (Producer ByteString m a)
forall a b. (a -> b) -> a -> b
$ RecordHeader
-> Producer ByteString m (Producer o m (Producer ByteString m a))
-> Proxy X () () o m (Producer o m (Producer ByteString m a))
forall b. RecordHeader -> Producer ByteString m b -> Producer o m b
f RecordHeader
hdr Producer ByteString m (Producer o m (Producer ByteString m a))
body
encodeRecord :: Monad m => Record m a -> Producer BS.ByteString m a
encodeRecord :: Record m a -> Producer ByteString m a
encodeRecord (Record RecordHeader
hdr Producer ByteString m a
content) = do
ByteString -> Producer' ByteString m ()
forall (m :: * -> *).
Monad m =>
ByteString -> Producer' ByteString m ()
PBS.fromLazy (ByteString -> Producer' ByteString m ())
-> ByteString -> Producer' ByteString m ()
forall a b. (a -> b) -> a -> b
$ Builder -> ByteString
BB.toLazyByteString (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$ RecordHeader -> Builder
encodeHeader RecordHeader
hdr
Producer ByteString m a
content