{-# OPTIONS_HADDOCK hide #-}
{-# LANGUAGE FlexibleContexts #-}
module Streamly.Internal.Data.Unicode.Stream
(
decodeLatin1
, decodeUtf8
, decodeUtf8Lax
, D.DecodeError(..)
, D.DecodeState
, D.CodePoint
, decodeUtf8Either
, resumeDecodeUtf8Either
, decodeUtf8Arrays
, decodeUtf8ArraysLenient
, encodeLatin1
, encodeLatin1Lax
, encodeUtf8
, stripStart
, lines
, words
, unlines
, unwords
)
where
import Control.Monad.IO.Class (MonadIO)
import Data.Char (ord)
import Data.Word (Word8)
import GHC.Base (unsafeChr)
import Streamly (IsStream)
import Prelude hiding (String, lines, words, unlines, unwords)
import Streamly.Data.Fold (Fold)
import Streamly.Memory.Array (Array)
import Streamly.Internal.Data.Unfold (Unfold)
import qualified Streamly.Internal.Prelude as S
import qualified Streamly.Streams.StreamD as D
{-# INLINE decodeLatin1 #-}
decodeLatin1 :: (IsStream t, Monad m) => t m Word8 -> t m Char
decodeLatin1 = S.map (unsafeChr . fromIntegral)
{-# INLINE encodeLatin1 #-}
encodeLatin1 :: (IsStream t, Monad m) => t m Char -> t m Word8
encodeLatin1 = S.map convert
where
convert c =
let codepoint = ord c
in if codepoint > 255
then error $ "Streamly.String.encodeLatin1 invalid \
\input char codepoint " ++ show codepoint
else fromIntegral codepoint
{-# INLINE encodeLatin1Lax #-}
encodeLatin1Lax :: (IsStream t, Monad m) => t m Char -> t m Word8
encodeLatin1Lax = S.map (fromIntegral . ord)
{-# INLINE decodeUtf8 #-}
decodeUtf8 :: (Monad m, IsStream t) => t m Word8 -> t m Char
decodeUtf8 = D.fromStreamD . D.decodeUtf8 . D.toStreamD
{-# INLINE decodeUtf8Arrays #-}
decodeUtf8Arrays :: (MonadIO m, IsStream t) => t m (Array Word8) -> t m Char
decodeUtf8Arrays = D.fromStreamD . D.decodeUtf8Arrays . D.toStreamD
{-# INLINE decodeUtf8Lax #-}
decodeUtf8Lax :: (Monad m, IsStream t) => t m Word8 -> t m Char
decodeUtf8Lax = D.fromStreamD . D.decodeUtf8Lenient . D.toStreamD
{-# INLINE decodeUtf8Either #-}
decodeUtf8Either :: (Monad m, IsStream t)
=> t m Word8 -> t m (Either D.DecodeError Char)
decodeUtf8Either = D.fromStreamD . D.decodeUtf8Either . D.toStreamD
{-# INLINE resumeDecodeUtf8Either #-}
resumeDecodeUtf8Either
:: (Monad m, IsStream t)
=> D.DecodeState
-> D.CodePoint
-> t m Word8
-> t m (Either D.DecodeError Char)
resumeDecodeUtf8Either st cp =
D.fromStreamD . D.resumeDecodeUtf8Either st cp . D.toStreamD
{-# INLINE decodeUtf8ArraysLenient #-}
decodeUtf8ArraysLenient ::
(MonadIO m, IsStream t) => t m (Array Word8) -> t m Char
decodeUtf8ArraysLenient =
D.fromStreamD . D.decodeUtf8ArraysLenient . D.toStreamD
{-# INLINE encodeUtf8 #-}
encodeUtf8 :: (Monad m, IsStream t) => t m Char -> t m Word8
encodeUtf8 = D.fromStreamD . D.encodeUtf8 . D.toStreamD
{-# INLINE stripStart #-}
stripStart :: (Monad m, IsStream t) => t m Char -> t m Char
stripStart = S.dropWhile isSpace
{-# INLINE lines #-}
lines :: (Monad m, IsStream t) => Fold m Char b -> t m Char -> t m b
lines = S.splitOnSuffix (== '\n')
foreign import ccall unsafe "u_iswspace"
iswspace :: Int -> Int
{-# INLINE isSpace #-}
isSpace :: Char -> Bool
isSpace c
| uc <= 0x377 = uc == 32 || uc - 0x9 <= 4 || uc == 0xa0
| otherwise = iswspace (ord c) /= 0
where
uc = fromIntegral (ord c) :: Word
{-# INLINE words #-}
words :: (Monad m, IsStream t) => Fold m Char b -> t m Char -> t m b
words = S.wordsBy isSpace
{-# INLINE unlines #-}
unlines :: (MonadIO m, IsStream t) => Unfold m a Char -> t m a -> t m Char
unlines = S.interposeSuffix '\n'
{-# INLINE unwords #-}
unwords :: (MonadIO m, IsStream t) => Unfold m a Char -> t m a -> t m Char
unwords = S.interpose ' '