-- |
-- Module      : Basement.String.Encoding.UTF16
-- License     : BSD-style
-- Maintainer  : Foundation
-- Stability   : experimental
-- Portability : portable
--
{-# LANGUAGE MagicHash #-}
module Basement.String.Encoding.UTF16
    ( UTF16(..)
    , UTF16_Invalid(..)
    ) where

import GHC.Prim
import GHC.Word
import GHC.Types
import qualified Prelude
import Basement.Compat.Base
import Basement.Compat.Primitive
import Basement.IntegralConv
import Basement.Bits
import Basement.Types.OffsetSize
import Basement.Monad
import Basement.Numerical.Additive
import Basement.Numerical.Subtractive
import Basement.UArray
import Basement.UArray.Mutable (MUArray)
import Basement.MutableBuilder

import Basement.String.Encoding.Encoding

data UTF16_Invalid
    = InvalidContinuation
    | InvalidUnicode Char
  deriving (Int -> UTF16_Invalid -> ShowS
[UTF16_Invalid] -> ShowS
UTF16_Invalid -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF16_Invalid] -> ShowS
$cshowList :: [UTF16_Invalid] -> ShowS
show :: UTF16_Invalid -> String
$cshow :: UTF16_Invalid -> String
showsPrec :: Int -> UTF16_Invalid -> ShowS
$cshowsPrec :: Int -> UTF16_Invalid -> ShowS
Show, UTF16_Invalid -> UTF16_Invalid -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
$c/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
== :: UTF16_Invalid -> UTF16_Invalid -> Bool
$c== :: UTF16_Invalid -> UTF16_Invalid -> Bool
Eq, Typeable)
instance Exception UTF16_Invalid

data UTF16 = UTF16

instance Encoding UTF16 where
    type Unit UTF16 = Word16
    type Error UTF16 = UTF16_Invalid
    encodingNext :: UTF16
-> (Offset (Unit UTF16) -> Unit UTF16)
-> Offset (Unit UTF16)
-> Either (Error UTF16) (Char, Offset (Unit UTF16))
encodingNext  UTF16
_ = (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
next
    encodingWrite :: forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
UTF16
-> Char
-> Builder
     (UArray (Unit UTF16)) (MUArray (Unit UTF16)) (Unit UTF16) st err ()
encodingWrite UTF16
_ = forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write


--
-- U+0000 to U+D7FF and U+E000 to U+FFFF : 1 bytes
-- U+10000 to U+10FFFF :
--    * 0x010000 is subtracted from the code point, leaving a 20-bit number in the range 0..0x0FFFFF.
--    * The top ten bits (a number in the range 0..0x03FF) are added to 0xD800 to give the first 16-bit code unit
--      or high surrogate, which will be in the range 0xD800..0xDBFF.
--    * The low ten bits (also in the range 0..0x03FF) are added to 0xDC00 to give the second 16-bit code unit
--      or low surrogate, which will be in the range 0xDC00..0xDFFF.

next :: (Offset Word16 -> Word16)
     -> Offset Word16
     -> Either UTF16_Invalid (Char, Offset Word16)
next :: (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
next Offset Word16 -> Word16
getter Offset Word16
off
    | Word16
h forall a. Ord a => a -> a -> Bool
<  Word16
0xd800 = forall a b. b -> Either a b
Right (Word16 -> Char
toChar16 Word16
h, Offset Word16
off forall a. Additive a => a -> a -> a
+ forall ty. Int -> Offset ty
Offset Int
1)
    | Word16
h forall a. Ord a => a -> a -> Bool
>= Word16
0xe000 = forall a b. b -> Either a b
Right (Word16 -> Char
toChar16 Word16
h, Offset Word16
off forall a. Additive a => a -> a -> a
+ forall ty. Int -> Offset ty
Offset Int
1)
    | Bool
otherwise   = Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
  where
    h :: Word16
    !h :: Word16
h = Offset Word16 -> Word16
getter Offset Word16
off

    to32 :: Word16 -> Word32
    to32 :: Word16 -> Word32
to32 (W16# Word16#
w) = Word32# -> Word32
W32# (Word16# -> Word32#
word16ToWord32# Word16#
w)

    toChar16 :: Word16 -> Char
    toChar16 :: Word16 -> Char
toChar16 (W16# Word16#
w) = Char# -> Char
C# (Word32# -> Char#
word32ToChar# (Word16# -> Word32#
word16ToWord32# Word16#
w))

    nextContinuation :: Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
        | Word16
cont forall a. Ord a => a -> a -> Bool
>= Word16
0xdc00 Bool -> Bool -> Bool
&& Word16
cont forall a. Ord a => a -> a -> Bool
< Word16
0xe00 =
            let !(W32# Word32#
w) = ((Word16 -> Word32
to32 Word16
h forall bits. BitOps bits => bits -> bits -> bits
.&. Word32
0x3ff) forall bits. BitOps bits => bits -> CountOf Bool -> bits
.<<. CountOf Bool
10) forall bits. BitOps bits => bits -> bits -> bits
.|. (Word16 -> Word32
to32 Word16
cont forall bits. BitOps bits => bits -> bits -> bits
.&. Word32
0x3ff)
             in forall a b. b -> Either a b
Right (Char# -> Char
C# (Word32# -> Char#
word32ToChar# Word32#
w), Offset Word16
off forall a. Additive a => a -> a -> a
+ forall ty. Int -> Offset ty
Offset Int
2)
        | Bool
otherwise = forall a b. a -> Either a b
Left UTF16_Invalid
InvalidContinuation
      where
        cont :: Word16
        !cont :: Word16
cont = Offset Word16 -> Word16
getter forall a b. (a -> b) -> a -> b
$ Offset Word16
off forall a. Additive a => a -> a -> a
+ forall ty. Int -> Offset ty
Offset Int
1

write :: (PrimMonad st, Monad st)
      => Char
      -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write :: forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write Char
c
    | Char
c forall a. Ord a => a -> a -> Bool
< forall a. Enum a => Int -> a
toEnum Int
0xd800   = forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
    | Char
c forall a. Ord a => a -> a -> Bool
> forall a. Enum a => Int -> a
toEnum Int
0x10000  = let (Word16
w1, Word16
w2) = Char -> (Word16, Word16)
wHigh Char
c in forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w1 forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w2
    | Char
c forall a. Ord a => a -> a -> Bool
> forall a. Enum a => Int -> a
toEnum Int
0x10ffff = forall a e. Exception e => e -> a
throw forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
    | Char
c forall a. Ord a => a -> a -> Bool
>= forall a. Enum a => Int -> a
toEnum Int
0xe000  = forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
    | Bool
otherwise = forall a e. Exception e => e -> a
throw forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
  where
    w16 :: Char -> Word16
    w16 :: Char -> Word16
w16 (C# Char#
ch) = Word16# -> Word16
W16# (Word# -> Word16#
wordToWord16# (Int# -> Word#
int2Word# (Char# -> Int#
ord# Char#
ch)))

    to16 :: Word32 -> Word16
    to16 :: Word32 -> Word16
to16 = forall a b. (Integral a, Num b) => a -> b
Prelude.fromIntegral

    wHigh :: Char -> (Word16, Word16)
    wHigh :: Char -> (Word16, Word16)
wHigh (C# Char#
ch) =
        let v :: Difference Word32
v = Word32# -> Word32
W32# (Char# -> Word32#
charToWord32# Char#
ch) forall a. Subtractive a => a -> a -> Difference a
- Word32
0x10000
         in (Word16
0xdc00 forall bits. BitOps bits => bits -> bits -> bits
.|. Word32 -> Word16
to16 (Difference Word32
v forall bits. BitOps bits => bits -> CountOf Bool -> bits
.>>. CountOf Bool
10), Word16
0xd800 forall bits. BitOps bits => bits -> bits -> bits
.|. Word32 -> Word16
to16 (Difference Word32
v forall bits. BitOps bits => bits -> bits -> bits
.&. Word32
0x3ff))