{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
module GHC.IO.Encoding.Failure (
CodingFailureMode(..), codingFailureModeSuffix,
isSurrogate,
recoverDecode, recoverEncode
) where
import GHC.IO
import GHC.IO.Buffer
import GHC.IO.Exception
import GHC.Base
import GHC.Char
import GHC.Word
import GHC.Show
import GHC.Num
import GHC.Real ( fromIntegral )
data CodingFailureMode
= ErrorOnCodingFailure
| IgnoreCodingFailure
| TransliterateCodingFailure
| RoundtripFailure
deriving ( Show
)
codingFailureModeSuffix :: CodingFailureMode -> String
codingFailureModeSuffix :: CodingFailureMode -> String
codingFailureModeSuffix ErrorOnCodingFailure = ""
codingFailureModeSuffix IgnoreCodingFailure = "//IGNORE"
codingFailureModeSuffix TransliterateCodingFailure = "//TRANSLIT"
codingFailureModeSuffix RoundtripFailure = "//ROUNDTRIP"
unrepresentableChar :: Char
unrepresentableChar :: Char
unrepresentableChar = '\xFFFD'
{-# INLINE isSurrogate #-}
isSurrogate :: Char -> Bool
isSurrogate :: Char -> Bool
isSurrogate c :: Char
c = (0xD800 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xDBFF)
Bool -> Bool -> Bool
|| (0xDC00 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0xDFFF)
where x :: Int
x = Char -> Int
ord Char
c
{-# INLINE escapeToRoundtripCharacterSurrogate #-}
escapeToRoundtripCharacterSurrogate :: Word8 -> Char
escapeToRoundtripCharacterSurrogate :: Word8 -> Char
escapeToRoundtripCharacterSurrogate b :: Word8
b
| Word8
b Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< 128 = Int -> Char
chr (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b)
| Bool
otherwise = Int -> Char
chr (0xDC00 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b)
{-# INLINE unescapeRoundtripCharacterSurrogate #-}
unescapeRoundtripCharacterSurrogate :: Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate :: Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate c :: Char
c
| 0xDC80 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0xDD00 = Word8 -> Maybe Word8
forall a. a -> Maybe a
Just (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
x)
| Bool
otherwise = Maybe Word8
forall a. Maybe a
Nothing
where x :: Int
x = Char -> Int
ord Char
c
recoverDecode :: CodingFailureMode -> Buffer Word8 -> Buffer Char
-> IO (Buffer Word8, Buffer Char)
recoverDecode :: CodingFailureMode
-> Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char)
recoverDecode cfm :: CodingFailureMode
cfm input :: Buffer Word8
input@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Word8
iraw, bufL :: forall e. Buffer e -> Int
bufL=Int
ir, bufR :: forall e. Buffer e -> Int
bufR=Int
_ }
output :: Buffer Char
output@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Char
oraw, bufL :: forall e. Buffer e -> Int
bufL=Int
_, bufR :: forall e. Buffer e -> Int
bufR=Int
ow } = do
case CodingFailureMode
cfm of
ErrorOnCodingFailure -> IO (Buffer Word8, Buffer Char)
forall a. IO a
ioe_decodingError
IgnoreCodingFailure -> (Buffer Word8, Buffer Char) -> IO (Buffer Word8, Buffer Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Word8
input { bufL :: Int
bufL=Int
irInt -> Int -> Int
forall a. Num a => a -> a -> a
+1 }, Buffer Char
output)
TransliterateCodingFailure -> do
Int
ow' <- RawBuffer Char -> Int -> Char -> IO Int
writeCharBuf RawBuffer Char
oraw Int
ow Char
unrepresentableChar
(Buffer Word8, Buffer Char) -> IO (Buffer Word8, Buffer Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Word8
input { bufL :: Int
bufL=Int
irInt -> Int -> Int
forall a. Num a => a -> a -> a
+1 }, Buffer Char
output { bufR :: Int
bufR=Int
ow' })
RoundtripFailure -> do
Word8
b <- RawBuffer Word8 -> Int -> IO Word8
readWord8Buf RawBuffer Word8
iraw Int
ir
Int
ow' <- RawBuffer Char -> Int -> Char -> IO Int
writeCharBuf RawBuffer Char
oraw Int
ow (Word8 -> Char
escapeToRoundtripCharacterSurrogate Word8
b)
(Buffer Word8, Buffer Char) -> IO (Buffer Word8, Buffer Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Word8
input { bufL :: Int
bufL=Int
irInt -> Int -> Int
forall a. Num a => a -> a -> a
+1 }, Buffer Char
output { bufR :: Int
bufR=Int
ow' })
recoverEncode :: CodingFailureMode -> Buffer Char -> Buffer Word8
-> IO (Buffer Char, Buffer Word8)
recoverEncode :: CodingFailureMode
-> Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8)
recoverEncode cfm :: CodingFailureMode
cfm input :: Buffer Char
input@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Char
iraw, bufL :: forall e. Buffer e -> Int
bufL=Int
ir, bufR :: forall e. Buffer e -> Int
bufR=Int
_ }
output :: Buffer Word8
output@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Word8
oraw, bufL :: forall e. Buffer e -> Int
bufL=Int
_, bufR :: forall e. Buffer e -> Int
bufR=Int
ow } = do
(c :: Char
c,ir' :: Int
ir') <- RawBuffer Char -> Int -> IO (Char, Int)
readCharBuf RawBuffer Char
iraw Int
ir
case CodingFailureMode
cfm of
IgnoreCodingFailure -> (Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input { bufL :: Int
bufL=Int
ir' }, Buffer Word8
output)
TransliterateCodingFailure -> do
if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '?'
then (Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input { bufL :: Int
bufL=Int
ir' }, Buffer Word8
output)
else do
Int
_ir' <- RawBuffer Char -> Int -> Char -> IO Int
writeCharBuf RawBuffer Char
iraw Int
ir '?'
(Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input, Buffer Word8
output)
RoundtripFailure | Just x :: Word8
x <- Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate Char
c -> do
RawBuffer Word8 -> Int -> Word8 -> IO ()
writeWord8Buf RawBuffer Word8
oraw Int
ow Word8
x
(Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input { bufL :: Int
bufL=Int
ir' }, Buffer Word8
output { bufR :: Int
bufR=Int
owInt -> Int -> Int
forall a. Num a => a -> a -> a
+1 })
_ -> IO (Buffer Char, Buffer Word8)
forall a. IO a
ioe_encodingError
ioe_decodingError :: IO a
ioe_decodingError :: IO a
ioe_decodingError = IOException -> IO a
forall a. IOException -> IO a
ioException
(Maybe Handle
-> IOErrorType
-> String
-> String
-> Maybe CInt
-> Maybe String
-> IOException
IOError Maybe Handle
forall a. Maybe a
Nothing IOErrorType
InvalidArgument "recoverDecode"
"invalid byte sequence" Maybe CInt
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing)
ioe_encodingError :: IO a
ioe_encodingError :: IO a
ioe_encodingError = IOException -> IO a
forall a. IOException -> IO a
ioException
(Maybe Handle
-> IOErrorType
-> String
-> String
-> Maybe CInt
-> Maybe String
-> IOException
IOError Maybe Handle
forall a. Maybe a
Nothing IOErrorType
InvalidArgument "recoverEncode"
"invalid character" Maybe CInt
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing)