{-# LANGUAGE MagicHash #-}
module Basement.String.Encoding.UTF16
( UTF16(..)
, UTF16_Invalid(..)
) where
import GHC.Prim
import GHC.Word
import GHC.Types
import qualified Prelude
import Basement.Compat.Base
import Basement.Compat.Primitive
import Basement.IntegralConv
import Basement.Bits
import Basement.Types.OffsetSize
import Basement.Monad
import Basement.Numerical.Additive
import Basement.Numerical.Subtractive
import Basement.UArray
import Basement.UArray.Mutable (MUArray)
import Basement.MutableBuilder
import Basement.String.Encoding.Encoding
data UTF16_Invalid
= InvalidContinuation
| InvalidUnicode Char
deriving (Int -> UTF16_Invalid -> ShowS
[UTF16_Invalid] -> ShowS
UTF16_Invalid -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF16_Invalid] -> ShowS
$cshowList :: [UTF16_Invalid] -> ShowS
show :: UTF16_Invalid -> String
$cshow :: UTF16_Invalid -> String
showsPrec :: Int -> UTF16_Invalid -> ShowS
$cshowsPrec :: Int -> UTF16_Invalid -> ShowS
Show, UTF16_Invalid -> UTF16_Invalid -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
$c/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
== :: UTF16_Invalid -> UTF16_Invalid -> Bool
$c== :: UTF16_Invalid -> UTF16_Invalid -> Bool
Eq, Typeable)
instance Exception UTF16_Invalid
data UTF16 = UTF16
instance Encoding UTF16 where
type Unit UTF16 = Word16
type Error UTF16 = UTF16_Invalid
encodingNext :: UTF16
-> (Offset (Unit UTF16) -> Unit UTF16)
-> Offset (Unit UTF16)
-> Either (Error UTF16) (Char, Offset (Unit UTF16))
encodingNext UTF16
_ = (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
next
encodingWrite :: forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
UTF16
-> Char
-> Builder
(UArray (Unit UTF16)) (MUArray (Unit UTF16)) (Unit UTF16) st err ()
encodingWrite UTF16
_ = forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write
next :: (Offset Word16 -> Word16)
-> Offset Word16
-> Either UTF16_Invalid (Char, Offset Word16)
next :: (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
next Offset Word16 -> Word16
getter Offset Word16
off
| Word16
h forall a. Ord a => a -> a -> Bool
< Word16
0xd800 = forall a b. b -> Either a b
Right (Word16 -> Char
toChar16 Word16
h, Offset Word16
off forall a. Additive a => a -> a -> a
+ forall ty. Int -> Offset ty
Offset Int
1)
| Word16
h forall a. Ord a => a -> a -> Bool
>= Word16
0xe000 = forall a b. b -> Either a b
Right (Word16 -> Char
toChar16 Word16
h, Offset Word16
off forall a. Additive a => a -> a -> a
+ forall ty. Int -> Offset ty
Offset Int
1)
| Bool
otherwise = Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
where
h :: Word16
!h :: Word16
h = Offset Word16 -> Word16
getter Offset Word16
off
to32 :: Word16 -> Word32
to32 :: Word16 -> Word32
to32 (W16# Word16#
w) = Word32# -> Word32
W32# (Word16# -> Word32#
word16ToWord32# Word16#
w)
toChar16 :: Word16 -> Char
toChar16 :: Word16 -> Char
toChar16 (W16# Word16#
w) = Char# -> Char
C# (Word32# -> Char#
word32ToChar# (Word16# -> Word32#
word16ToWord32# Word16#
w))
nextContinuation :: Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
| Word16
cont forall a. Ord a => a -> a -> Bool
>= Word16
0xdc00 Bool -> Bool -> Bool
&& Word16
cont forall a. Ord a => a -> a -> Bool
< Word16
0xe00 =
let !(W32# Word32#
w) = ((Word16 -> Word32
to32 Word16
h forall bits. BitOps bits => bits -> bits -> bits
.&. Word32
0x3ff) forall bits. BitOps bits => bits -> CountOf Bool -> bits
.<<. CountOf Bool
10) forall bits. BitOps bits => bits -> bits -> bits
.|. (Word16 -> Word32
to32 Word16
cont forall bits. BitOps bits => bits -> bits -> bits
.&. Word32
0x3ff)
in forall a b. b -> Either a b
Right (Char# -> Char
C# (Word32# -> Char#
word32ToChar# Word32#
w), Offset Word16
off forall a. Additive a => a -> a -> a
+ forall ty. Int -> Offset ty
Offset Int
2)
| Bool
otherwise = forall a b. a -> Either a b
Left UTF16_Invalid
InvalidContinuation
where
cont :: Word16
!cont :: Word16
cont = Offset Word16 -> Word16
getter forall a b. (a -> b) -> a -> b
$ Offset Word16
off forall a. Additive a => a -> a -> a
+ forall ty. Int -> Offset ty
Offset Int
1
write :: (PrimMonad st, Monad st)
=> Char
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write :: forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write Char
c
| Char
c forall a. Ord a => a -> a -> Bool
< forall a. Enum a => Int -> a
toEnum Int
0xd800 = forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
| Char
c forall a. Ord a => a -> a -> Bool
> forall a. Enum a => Int -> a
toEnum Int
0x10000 = let (Word16
w1, Word16
w2) = Char -> (Word16, Word16)
wHigh Char
c in forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w1 forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w2
| Char
c forall a. Ord a => a -> a -> Bool
> forall a. Enum a => Int -> a
toEnum Int
0x10ffff = forall a e. Exception e => e -> a
throw forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
| Char
c forall a. Ord a => a -> a -> Bool
>= forall a. Enum a => Int -> a
toEnum Int
0xe000 = forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
| Bool
otherwise = forall a e. Exception e => e -> a
throw forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
where
w16 :: Char -> Word16
w16 :: Char -> Word16
w16 (C# Char#
ch) = Word16# -> Word16
W16# (Word# -> Word16#
wordToWord16# (Int# -> Word#
int2Word# (Char# -> Int#
ord# Char#
ch)))
to16 :: Word32 -> Word16
to16 :: Word32 -> Word16
to16 = forall a b. (Integral a, Num b) => a -> b
Prelude.fromIntegral
wHigh :: Char -> (Word16, Word16)
wHigh :: Char -> (Word16, Word16)
wHigh (C# Char#
ch) =
let v :: Difference Word32
v = Word32# -> Word32
W32# (Char# -> Word32#
charToWord32# Char#
ch) forall a. Subtractive a => a -> a -> Difference a
- Word32
0x10000
in (Word16
0xdc00 forall bits. BitOps bits => bits -> bits -> bits
.|. Word32 -> Word16
to16 (Difference Word32
v forall bits. BitOps bits => bits -> CountOf Bool -> bits
.>>. CountOf Bool
10), Word16
0xd800 forall bits. BitOps bits => bits -> bits -> bits
.|. Word32 -> Word16
to16 (Difference Word32
v forall bits. BitOps bits => bits -> bits -> bits
.&. Word32
0x3ff))