{-# LANGUAGE MagicHash #-}
module Basement.String.Encoding.UTF16
( UTF16(..)
, UTF16_Invalid(..)
) where
import GHC.Prim
import GHC.Word
import GHC.Types
import Data.Bits
import qualified Prelude
import Basement.Compat.Base
import Basement.Types.OffsetSize
import Basement.Monad
import Basement.Numerical.Additive
import Basement.UArray
import Basement.UArray.Mutable (MUArray)
import Basement.MutableBuilder
import Basement.String.Encoding.Encoding
data UTF16_Invalid
= InvalidContinuation
| InvalidUnicode Char
deriving (Int -> UTF16_Invalid -> ShowS
[UTF16_Invalid] -> ShowS
UTF16_Invalid -> String
(Int -> UTF16_Invalid -> ShowS)
-> (UTF16_Invalid -> String)
-> ([UTF16_Invalid] -> ShowS)
-> Show UTF16_Invalid
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF16_Invalid] -> ShowS
$cshowList :: [UTF16_Invalid] -> ShowS
show :: UTF16_Invalid -> String
$cshow :: UTF16_Invalid -> String
showsPrec :: Int -> UTF16_Invalid -> ShowS
$cshowsPrec :: Int -> UTF16_Invalid -> ShowS
Show, UTF16_Invalid -> UTF16_Invalid -> Bool
(UTF16_Invalid -> UTF16_Invalid -> Bool)
-> (UTF16_Invalid -> UTF16_Invalid -> Bool) -> Eq UTF16_Invalid
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
$c/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
== :: UTF16_Invalid -> UTF16_Invalid -> Bool
$c== :: UTF16_Invalid -> UTF16_Invalid -> Bool
Eq, Typeable)
instance Exception UTF16_Invalid
data UTF16 = UTF16
instance Encoding UTF16 where
type Unit UTF16 = Word16
type Error UTF16 = UTF16_Invalid
encodingNext :: UTF16
-> (Offset (Unit UTF16) -> Unit UTF16)
-> Offset (Unit UTF16)
-> Either (Error UTF16) (Char, Offset (Unit UTF16))
encodingNext UTF16
_ = (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
(Offset (Unit UTF16) -> Unit UTF16)
-> Offset (Unit UTF16)
-> Either (Error UTF16) (Char, Offset (Unit UTF16))
next
encodingWrite :: UTF16
-> Char
-> Builder
(UArray (Unit UTF16)) (MUArray (Unit UTF16)) (Unit UTF16) st err ()
encodingWrite UTF16
_ = Char
-> Builder
(UArray (Unit UTF16)) (MUArray (Unit UTF16)) (Unit UTF16) st err ()
forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write
next :: (Offset Word16 -> Word16)
-> Offset Word16
-> Either UTF16_Invalid (Char, Offset Word16)
next :: (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
next Offset Word16 -> Word16
getter Offset Word16
off
| Word16
h Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xd800 = (Char, Offset Word16) -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. b -> Either a b
Right (Word# -> Char
toChar Word#
hh, Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
1)
| Word16
h Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xe000 = (Char, Offset Word16) -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. b -> Either a b
Right (Word# -> Char
toChar Word#
hh, Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
1)
| Bool
otherwise = Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
where
h :: Word16
!h :: Word16
h@(W16# Word#
hh) = Offset Word16 -> Word16
getter Offset Word16
off
toChar :: Word# -> Char
toChar :: Word# -> Char
toChar Word#
w = Char# -> Char
C# (Int# -> Char#
chr# (Word# -> Int#
word2Int# Word#
w))
to32 :: Word16 -> Word32
to32 :: Word16 -> Word32
to32 (W16# Word#
w) = Word# -> Word32
W32# Word#
w
nextContinuation :: Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
| Word16
cont Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xdc00 Bool -> Bool -> Bool
&& Word16
cont Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xe00 =
let !(W32# Word#
w) = ((Word16 -> Word32
to32 Word16
h Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x3ff) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
10)
Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.|. (Word16 -> Word32
to32 Word16
cont Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x3ff)
in (Char, Offset Word16) -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. b -> Either a b
Right (Word# -> Char
toChar Word#
w, Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
2)
| Bool
otherwise = UTF16_Invalid -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. a -> Either a b
Left UTF16_Invalid
InvalidContinuation
where
cont :: Word16
!cont :: Word16
cont = Offset Word16 -> Word16
getter (Offset Word16 -> Word16) -> Offset Word16 -> Word16
forall a b. (a -> b) -> a -> b
$ Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
1
write :: (PrimMonad st, Monad st)
=> Char
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write :: Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write Char
c
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0xd800 = Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend (Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0x10000 = let (Word16
w1, Word16
w2) = Char -> (Word16, Word16)
wHigh Char
c in Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w1 Builder (UArray Word16) (MUArray Word16) Word16 st err ()
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w2
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0x10ffff = UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a e. Exception e => e -> a
throw (UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0xe000 = Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend (Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
| Bool
otherwise = UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a e. Exception e => e -> a
throw (UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
where
w16 :: Char -> Word16
w16 :: Char -> Word16
w16 (C# Char#
ch) = Word# -> Word16
W16# (Int# -> Word#
int2Word# (Char# -> Int#
ord# Char#
ch))
to16 :: Word32 -> Word16
to16 :: Word32 -> Word16
to16 = Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
Prelude.fromIntegral
wHigh :: Char -> (Word16, Word16)
wHigh :: Char -> (Word16, Word16)
wHigh (C# Char#
ch) =
let v :: Word32
v = Word# -> Word32
W32# (Word# -> Word# -> Word#
minusWord# (Int# -> Word#
int2Word# (Char# -> Int#
ord# Char#
ch)) Word#
0x10000##)
in (Word16
0xdc00 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word32 -> Word16
to16 (Word32
v Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
10), Word16
0xd800 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word32 -> Word16
to16 (Word32
v Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x3ff))