{-# LANGUAGE BangPatterns, CPP, RankNTypes #-}

-- |
-- Module      : Data.Text.Internal.Encoding.Fusion
-- Copyright   : (c) Tom Harper 2008-2009,
--               (c) Bryan O'Sullivan 2009,
--               (c) Duncan Coutts 2009
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- /Warning/: this is an internal module, and does not have a stable
-- API or name. Functions in this module may not check or enforce
-- preconditions expected by public modules. Use at your own risk!
--
-- Fusible 'Stream'-oriented functions for converting between 'Text'
-- and several common encodings.

module Data.Text.Internal.Encoding.Fusion
    (
    -- * Streaming
      streamASCII
    , streamUtf8
    , streamUtf16LE
    , streamUtf16BE
    , streamUtf32LE
    , streamUtf32BE

    -- * Unstreaming
    , unstream

    , module Data.Text.Internal.Encoding.Fusion.Common
    ) where

#if defined(ASSERTS)
import Control.Exception (assert)
#endif
import Data.Bits (shiftL, shiftR)
import Data.ByteString.Internal (ByteString(..), mallocByteString, memcpy)
import Data.Text.Internal.Fusion (Step(..), Stream(..))
import Data.Text.Internal.Fusion.Size
import Data.Text.Encoding.Error
import Data.Text.Internal.Encoding.Fusion.Common
import Data.Text.Internal.Unsafe.Char (unsafeChr8, unsafeChr16, unsafeChr32)
import Data.Text.Internal.Unsafe (unsafeWithForeignPtr)
import Data.Word (Word8, Word16, Word32)
import Foreign.ForeignPtr (ForeignPtr)
import Foreign.Storable (pokeByteOff)
import qualified Data.ByteString as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.Text.Internal.Encoding.Utf8 as U8
import qualified Data.Text.Internal.Encoding.Utf16 as U16
import qualified Data.Text.Internal.Encoding.Utf32 as U32
import Data.Text.Unsafe (unsafeDupablePerformIO)
import Data.Text.Internal.ByteStringCompat

streamASCII :: ByteString -> Stream Char
streamASCII :: ByteString -> Stream Char
streamASCII ByteString
bs = forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
0 (Int -> Size
maxSize Int
l)
    where
      l :: Int
l = ByteString -> Int
B.length ByteString
bs
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next Int
i
          | Int
i forall a. Ord a => a -> a -> Bool
>= Int
l    = forall s a. Step s a
Done
          | Bool
otherwise = forall s a. a -> s -> Step s a
Yield (Word8 -> Char
unsafeChr8 Word8
x1) (Int
iforall a. Num a => a -> a -> a
+Int
1)
          where
            x1 :: Word8
x1 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs Int
i
{-# DEPRECATED streamASCII "Do not use this function" #-}
{-# INLINE [0] streamASCII #-}

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using UTF-8
-- encoding.
streamUtf8 :: OnDecodeError -> ByteString -> Stream Char
streamUtf8 :: OnDecodeError -> ByteString -> Stream Char
streamUtf8 OnDecodeError
onErr ByteString
bs = forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
0 (Int -> Size
maxSize Int
l)
    where
      l :: Int
l = ByteString -> Int
B.length ByteString
bs
      next :: Int -> Step Int Char
next Int
i
          | Int
i forall a. Ord a => a -> a -> Bool
>= Int
l = forall s a. Step s a
Done
          | Word8 -> Bool
U8.validate1 Word8
x1 = forall s a. a -> s -> Step s a
Yield (Word8 -> Char
unsafeChr8 Word8
x1) (Int
iforall a. Num a => a -> a -> a
+Int
1)
          | Int
iforall a. Num a => a -> a -> a
+Int
1 forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word8 -> Word8 -> Bool
U8.validate2 Word8
x1 Word8
x2 = forall s a. a -> s -> Step s a
Yield (Word8 -> Word8 -> Char
U8.chr2 Word8
x1 Word8
x2) (Int
iforall a. Num a => a -> a -> a
+Int
2)
          | Int
iforall a. Num a => a -> a -> a
+Int
2 forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word8 -> Word8 -> Word8 -> Bool
U8.validate3 Word8
x1 Word8
x2 Word8
x3 = forall s a. a -> s -> Step s a
Yield (Word8 -> Word8 -> Word8 -> Char
U8.chr3 Word8
x1 Word8
x2 Word8
x3) (Int
iforall a. Num a => a -> a -> a
+Int
3)
          | Int
iforall a. Num a => a -> a -> a
+Int
3 forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word8 -> Word8 -> Word8 -> Word8 -> Bool
U8.validate4 Word8
x1 Word8
x2 Word8
x3 Word8
x4 = forall s a. a -> s -> Step s a
Yield (Word8 -> Word8 -> Word8 -> Word8 -> Char
U8.chr4 Word8
x1 Word8
x2 Word8
x3 Word8
x4) (Int
iforall a. Num a => a -> a -> a
+Int
4)
          | Bool
otherwise = forall s.
String
-> String -> OnDecodeError -> Maybe Word8 -> s -> Step s Char
decodeError String
"streamUtf8" String
"UTF-8" OnDecodeError
onErr (forall a. a -> Maybe a
Just Word8
x1) (Int
iforall a. Num a => a -> a -> a
+Int
1)
          where
            x1 :: Word8
x1 = Int -> Word8
idx Int
i
            x2 :: Word8
x2 = Int -> Word8
idx (Int
i forall a. Num a => a -> a -> a
+ Int
1)
            x3 :: Word8
x3 = Int -> Word8
idx (Int
i forall a. Num a => a -> a -> a
+ Int
2)
            x4 :: Word8
x4 = Int -> Word8
idx (Int
i forall a. Num a => a -> a -> a
+ Int
3)
            idx :: Int -> Word8
idx = ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs
{-# INLINE [0] streamUtf8 #-}

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little
-- endian UTF-16 encoding.
streamUtf16LE :: OnDecodeError -> ByteString -> Stream Char
streamUtf16LE :: OnDecodeError -> ByteString -> Stream Char
streamUtf16LE OnDecodeError
onErr ByteString
bs = forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
0 (Int -> Size
maxSize (Int
l forall a. Bits a => a -> Int -> a
`shiftR` Int
1))
    where
      l :: Int
l = ByteString -> Int
B.length ByteString
bs
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next Int
i
          | Int
i forall a. Ord a => a -> a -> Bool
>= Int
l                         = forall s a. Step s a
Done
          | Int
iforall a. Num a => a -> a -> a
+Int
1 forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word16 -> Bool
U16.validate1 Word16
x1    = forall s a. a -> s -> Step s a
Yield (Word16 -> Char
unsafeChr16 Word16
x1) (Int
iforall a. Num a => a -> a -> a
+Int
2)
          | Int
iforall a. Num a => a -> a -> a
+Int
3 forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word16 -> Word16 -> Bool
U16.validate2 Word16
x1 Word16
x2 = forall s a. a -> s -> Step s a
Yield (Word16 -> Word16 -> Char
U16.chr2 Word16
x1 Word16
x2) (Int
iforall a. Num a => a -> a -> a
+Int
4)
          | Bool
otherwise = forall s.
String
-> String -> OnDecodeError -> Maybe Word8 -> s -> Step s Char
decodeError String
"streamUtf16LE" String
"UTF-16LE" OnDecodeError
onErr forall a. Maybe a
Nothing (Int
iforall a. Num a => a -> a -> a
+Int
1)
          where
            x1 :: Word16
x1    = Int -> Word16
idx Int
i       forall a. Num a => a -> a -> a
+ (Int -> Word16
idx (Int
i forall a. Num a => a -> a -> a
+ Int
1) forall a. Bits a => a -> Int -> a
`shiftL` Int
8)
            x2 :: Word16
x2    = Int -> Word16
idx (Int
i forall a. Num a => a -> a -> a
+ Int
2) forall a. Num a => a -> a -> a
+ (Int -> Word16
idx (Int
i forall a. Num a => a -> a -> a
+ Int
3) forall a. Bits a => a -> Int -> a
`shiftL` Int
8)
            idx :: Int -> Word16
idx = Word8 -> Word16
word8ToWord16 forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs :: Int -> Word16
{-# INLINE [0] streamUtf16LE #-}

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big
-- endian UTF-16 encoding.
streamUtf16BE :: OnDecodeError -> ByteString -> Stream Char
streamUtf16BE :: OnDecodeError -> ByteString -> Stream Char
streamUtf16BE OnDecodeError
onErr ByteString
bs = forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
0 (Int -> Size
maxSize (Int
l forall a. Bits a => a -> Int -> a
`shiftR` Int
1))
    where
      l :: Int
l = ByteString -> Int
B.length ByteString
bs
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next Int
i
          | Int
i forall a. Ord a => a -> a -> Bool
>= Int
l                         = forall s a. Step s a
Done
          | Int
iforall a. Num a => a -> a -> a
+Int
1 forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word16 -> Bool
U16.validate1 Word16
x1    = forall s a. a -> s -> Step s a
Yield (Word16 -> Char
unsafeChr16 Word16
x1) (Int
iforall a. Num a => a -> a -> a
+Int
2)
          | Int
iforall a. Num a => a -> a -> a
+Int
3 forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word16 -> Word16 -> Bool
U16.validate2 Word16
x1 Word16
x2 = forall s a. a -> s -> Step s a
Yield (Word16 -> Word16 -> Char
U16.chr2 Word16
x1 Word16
x2) (Int
iforall a. Num a => a -> a -> a
+Int
4)
          | Bool
otherwise = forall s.
String
-> String -> OnDecodeError -> Maybe Word8 -> s -> Step s Char
decodeError String
"streamUtf16BE" String
"UTF-16BE" OnDecodeError
onErr forall a. Maybe a
Nothing (Int
iforall a. Num a => a -> a -> a
+Int
1)
          where
            x1 :: Word16
x1    = (Int -> Word16
idx Int
i forall a. Bits a => a -> Int -> a
`shiftL` Int
8)       forall a. Num a => a -> a -> a
+ Int -> Word16
idx (Int
i forall a. Num a => a -> a -> a
+ Int
1)
            x2 :: Word16
x2    = (Int -> Word16
idx (Int
i forall a. Num a => a -> a -> a
+ Int
2) forall a. Bits a => a -> Int -> a
`shiftL` Int
8) forall a. Num a => a -> a -> a
+ Int -> Word16
idx (Int
i forall a. Num a => a -> a -> a
+ Int
3)
            idx :: Int -> Word16
idx = Word8 -> Word16
word8ToWord16 forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs :: Int -> Word16
{-# INLINE [0] streamUtf16BE #-}

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using big
-- endian UTF-32 encoding.
streamUtf32BE :: OnDecodeError -> ByteString -> Stream Char
streamUtf32BE :: OnDecodeError -> ByteString -> Stream Char
streamUtf32BE OnDecodeError
onErr ByteString
bs = forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
0 (Int -> Size
maxSize (Int
l forall a. Bits a => a -> Int -> a
`shiftR` Int
2))
    where
      l :: Int
l = ByteString -> Int
B.length ByteString
bs
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next Int
i
          | Int
i forall a. Ord a => a -> a -> Bool
>= Int
l                    = forall s a. Step s a
Done
          | Int
iforall a. Num a => a -> a -> a
+Int
3 forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word32 -> Bool
U32.validate Word32
x = forall s a. a -> s -> Step s a
Yield (Word32 -> Char
unsafeChr32 Word32
x) (Int
iforall a. Num a => a -> a -> a
+Int
4)
          | Bool
otherwise = forall s.
String
-> String -> OnDecodeError -> Maybe Word8 -> s -> Step s Char
decodeError String
"streamUtf32BE" String
"UTF-32BE" OnDecodeError
onErr forall a. Maybe a
Nothing (Int
iforall a. Num a => a -> a -> a
+Int
1)
          where
            x :: Word32
x     = forall a. Bits a => a -> Int -> a
shiftL Word32
x1 Int
24 forall a. Num a => a -> a -> a
+ forall a. Bits a => a -> Int -> a
shiftL Word32
x2 Int
16 forall a. Num a => a -> a -> a
+ forall a. Bits a => a -> Int -> a
shiftL Word32
x3 Int
8 forall a. Num a => a -> a -> a
+ Word32
x4
            x1 :: Word32
x1    = Int -> Word32
idx Int
i
            x2 :: Word32
x2    = Int -> Word32
idx (Int
iforall a. Num a => a -> a -> a
+Int
1)
            x3 :: Word32
x3    = Int -> Word32
idx (Int
iforall a. Num a => a -> a -> a
+Int
2)
            x4 :: Word32
x4    = Int -> Word32
idx (Int
iforall a. Num a => a -> a -> a
+Int
3)
            idx :: Int -> Word32
idx = Word8 -> Word32
word8ToWord32 forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs :: Int -> Word32
{-# INLINE [0] streamUtf32BE #-}

-- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little
-- endian UTF-32 encoding.
streamUtf32LE :: OnDecodeError -> ByteString -> Stream Char
streamUtf32LE :: OnDecodeError -> ByteString -> Stream Char
streamUtf32LE OnDecodeError
onErr ByteString
bs = forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
0 (Int -> Size
maxSize (Int
l forall a. Bits a => a -> Int -> a
`shiftR` Int
2))
    where
      l :: Int
l = ByteString -> Int
B.length ByteString
bs
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next Int
i
          | Int
i forall a. Ord a => a -> a -> Bool
>= Int
l                    = forall s a. Step s a
Done
          | Int
iforall a. Num a => a -> a -> a
+Int
3 forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
&& Word32 -> Bool
U32.validate Word32
x = forall s a. a -> s -> Step s a
Yield (Word32 -> Char
unsafeChr32 Word32
x) (Int
iforall a. Num a => a -> a -> a
+Int
4)
          | Bool
otherwise = forall s.
String
-> String -> OnDecodeError -> Maybe Word8 -> s -> Step s Char
decodeError String
"streamUtf32LE" String
"UTF-32LE" OnDecodeError
onErr forall a. Maybe a
Nothing (Int
iforall a. Num a => a -> a -> a
+Int
1)
          where
            x :: Word32
x     = forall a. Bits a => a -> Int -> a
shiftL Word32
x4 Int
24 forall a. Num a => a -> a -> a
+ forall a. Bits a => a -> Int -> a
shiftL Word32
x3 Int
16 forall a. Num a => a -> a -> a
+ forall a. Bits a => a -> Int -> a
shiftL Word32
x2 Int
8 forall a. Num a => a -> a -> a
+ Word32
x1
            x1 :: Word32
x1    = Int -> Word32
idx Int
i
            x2 :: Word32
x2    = Int -> Word32
idx forall a b. (a -> b) -> a -> b
$ Int
iforall a. Num a => a -> a -> a
+Int
1
            x3 :: Word32
x3    = Int -> Word32
idx forall a b. (a -> b) -> a -> b
$ Int
iforall a. Num a => a -> a -> a
+Int
2
            x4 :: Word32
x4    = Int -> Word32
idx forall a b. (a -> b) -> a -> b
$ Int
iforall a. Num a => a -> a -> a
+Int
3
            idx :: Int -> Word32
idx = Word8 -> Word32
word8ToWord32 forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int -> Word8
B.unsafeIndex ByteString
bs :: Int -> Word32
{-# INLINE [0] streamUtf32LE #-}

-- | /O(n)/ Convert a 'Stream' 'Word8' to a 'ByteString'.
unstream :: Stream Word8 -> ByteString
unstream :: Stream Word8 -> ByteString
unstream (Stream s -> Step s Word8
next s
s0 Size
len) = forall a. IO a -> a
unsafeDupablePerformIO forall a b. (a -> b) -> a -> b
$ do
    let mlen :: Int
mlen = Int -> Size -> Int
upperBound Int
4 Size
len
    forall a. Int -> IO (ForeignPtr a)
mallocByteString Int
mlen forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Int -> s -> ForeignPtr Word8 -> IO ByteString
loop Int
mlen Int
0 s
s0
    where
      loop :: Int -> Int -> s -> ForeignPtr Word8 -> IO ByteString
loop !Int
n !Int
off !s
s ForeignPtr Word8
fp = case s -> Step s Word8
next s
s of
          Step s Word8
Done -> forall {m :: * -> *} {p}.
Monad m =>
ForeignPtr Word8 -> p -> Int -> m ByteString
trimUp ForeignPtr Word8
fp Int
n Int
off
          Skip s
s' -> Int -> Int -> s -> ForeignPtr Word8 -> IO ByteString
loop Int
n Int
off s
s' ForeignPtr Word8
fp
          Yield Word8
x s
s'
              | Int
off forall a. Eq a => a -> a -> Bool
== Int
n -> ForeignPtr Word8 -> Int -> Int -> s -> Word8 -> IO ByteString
realloc ForeignPtr Word8
fp Int
n Int
off s
s' Word8
x
              | Bool
otherwise -> do
            forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
fp forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
p Int
off Word8
x
            Int -> Int -> s -> ForeignPtr Word8 -> IO ByteString
loop Int
n (Int
offforall a. Num a => a -> a -> a
+Int
1) s
s' ForeignPtr Word8
fp
      {-# NOINLINE realloc #-}
      realloc :: ForeignPtr Word8 -> Int -> Int -> s -> Word8 -> IO ByteString
realloc ForeignPtr Word8
fp Int
n Int
off s
s Word8
x = do
        let n' :: Int
n' = Int
nforall a. Num a => a -> a -> a
+Int
n
        ForeignPtr Word8
fp' <- ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8)
copy0 ForeignPtr Word8
fp Int
n Int
n'
        forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
fp' forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
p Int
off Word8
x
        Int -> Int -> s -> ForeignPtr Word8 -> IO ByteString
loop Int
n' (Int
offforall a. Num a => a -> a -> a
+Int
1) s
s ForeignPtr Word8
fp'
      {-# NOINLINE trimUp #-}
      trimUp :: ForeignPtr Word8 -> p -> Int -> m ByteString
trimUp ForeignPtr Word8
fp p
_ Int
off = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! ForeignPtr Word8 -> Int -> ByteString
mkBS ForeignPtr Word8
fp Int
off
      copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8)
      copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8)
copy0 !ForeignPtr Word8
src !Int
srcLen !Int
destLen =
#if defined(ASSERTS)
        assert (srcLen <= destLen) $
#endif
        do
          ForeignPtr Word8
dest <- forall a. Int -> IO (ForeignPtr a)
mallocByteString Int
destLen
          forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
src  forall a b. (a -> b) -> a -> b
$ \Ptr Word8
src'  ->
              forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
dest forall a b. (a -> b) -> a -> b
$ \Ptr Word8
dest' ->
                  Ptr Word8 -> Ptr Word8 -> Int -> IO ()
memcpy Ptr Word8
dest' Ptr Word8
src' Int
srcLen
          forall (m :: * -> *) a. Monad m => a -> m a
return ForeignPtr Word8
dest

decodeError :: forall s. String -> String -> OnDecodeError -> Maybe Word8
            -> s -> Step s Char
decodeError :: forall s.
String
-> String -> OnDecodeError -> Maybe Word8 -> s -> Step s Char
decodeError String
func String
kind OnDecodeError
onErr Maybe Word8
mb s
i =
    case OnDecodeError
onErr String
desc Maybe Word8
mb of
      Maybe Char
Nothing -> forall s a. s -> Step s a
Skip s
i
      Just Char
c  -> forall s a. a -> s -> Step s a
Yield Char
c s
i
    where desc :: String
desc = String
"Data.Text.Internal.Encoding.Fusion." forall a. [a] -> [a] -> [a]
++ String
func forall a. [a] -> [a] -> [a]
++ String
": Invalid " forall a. [a] -> [a] -> [a]
++
                 String
kind forall a. [a] -> [a] -> [a]
++ String
" stream"

word8ToWord16 :: Word8 -> Word16
word8ToWord16 :: Word8 -> Word16
word8ToWord16 = forall a b. (Integral a, Num b) => a -> b
fromIntegral

word8ToWord32 :: Word8 -> Word32
word8ToWord32 :: Word8 -> Word32
word8ToWord32 = forall a b. (Integral a, Num b) => a -> b
fromIntegral