-- |
-- Module      : Basement.String.Encoding.UTF16
-- License     : BSD-style
-- Maintainer  : Foundation
-- Stability   : experimental
-- Portability : portable
--
{-# LANGUAGE MagicHash #-}
module Basement.String.Encoding.UTF16
    ( UTF16(..)
    , UTF16_Invalid(..)
    ) where

import GHC.Prim
import GHC.Word
import GHC.Types
import Data.Bits
import qualified Prelude
import Basement.Compat.Base
import Basement.Types.OffsetSize
import Basement.Monad
import Basement.Numerical.Additive
import Basement.UArray
import Basement.UArray.Mutable (MUArray)
import Basement.MutableBuilder

import Basement.String.Encoding.Encoding

data UTF16_Invalid
    = InvalidContinuation
    | InvalidUnicode Char
  deriving (Int -> UTF16_Invalid -> ShowS
[UTF16_Invalid] -> ShowS
UTF16_Invalid -> String
(Int -> UTF16_Invalid -> ShowS)
-> (UTF16_Invalid -> String)
-> ([UTF16_Invalid] -> ShowS)
-> Show UTF16_Invalid
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF16_Invalid] -> ShowS
$cshowList :: [UTF16_Invalid] -> ShowS
show :: UTF16_Invalid -> String
$cshow :: UTF16_Invalid -> String
showsPrec :: Int -> UTF16_Invalid -> ShowS
$cshowsPrec :: Int -> UTF16_Invalid -> ShowS
Show, UTF16_Invalid -> UTF16_Invalid -> Bool
(UTF16_Invalid -> UTF16_Invalid -> Bool)
-> (UTF16_Invalid -> UTF16_Invalid -> Bool) -> Eq UTF16_Invalid
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
$c/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
== :: UTF16_Invalid -> UTF16_Invalid -> Bool
$c== :: UTF16_Invalid -> UTF16_Invalid -> Bool
Eq, Typeable)
instance Exception UTF16_Invalid

data UTF16 = UTF16

instance Encoding UTF16 where
    type Unit UTF16 = Word16
    type Error UTF16 = UTF16_Invalid
    encodingNext :: UTF16
-> (Offset (Unit UTF16) -> Unit UTF16)
-> Offset (Unit UTF16)
-> Either (Error UTF16) (Char, Offset (Unit UTF16))
encodingNext  UTF16
_ = (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
(Offset (Unit UTF16) -> Unit UTF16)
-> Offset (Unit UTF16)
-> Either (Error UTF16) (Char, Offset (Unit UTF16))
next
    encodingWrite :: UTF16
-> Char
-> Builder
     (UArray (Unit UTF16)) (MUArray (Unit UTF16)) (Unit UTF16) st err ()
encodingWrite UTF16
_ = Char
-> Builder
     (UArray (Unit UTF16)) (MUArray (Unit UTF16)) (Unit UTF16) st err ()
forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write


--
-- U+0000 to U+D7FF and U+E000 to U+FFFF : 1 bytes
-- U+10000 to U+10FFFF :
--    * 0x010000 is subtracted from the code point, leaving a 20-bit number in the range 0..0x0FFFFF.
--    * The top ten bits (a number in the range 0..0x03FF) are added to 0xD800 to give the first 16-bit code unit
--      or high surrogate, which will be in the range 0xD800..0xDBFF.
--    * The low ten bits (also in the range 0..0x03FF) are added to 0xDC00 to give the second 16-bit code unit
--      or low surrogate, which will be in the range 0xDC00..0xDFFF.

next :: (Offset Word16 -> Word16)
     -> Offset Word16
     -> Either UTF16_Invalid (Char, Offset Word16)
next :: (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
next Offset Word16 -> Word16
getter Offset Word16
off
    | Word16
h Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
<  Word16
0xd800 = (Char, Offset Word16) -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. b -> Either a b
Right (Word# -> Char
toChar Word#
hh, Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
1)
    | Word16
h Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xe000 = (Char, Offset Word16) -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. b -> Either a b
Right (Word# -> Char
toChar Word#
hh, Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
1)
    | Bool
otherwise   = Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
  where
    h :: Word16
    !h :: Word16
h@(W16# Word#
hh) = Offset Word16 -> Word16
getter Offset Word16
off
    toChar :: Word# -> Char
    toChar :: Word# -> Char
toChar Word#
w = Char# -> Char
C# (Int# -> Char#
chr# (Word# -> Int#
word2Int# Word#
w))
    to32 :: Word16 -> Word32
    to32 :: Word16 -> Word32
to32 (W16# Word#
w) = Word# -> Word32
W32# Word#
w

    nextContinuation :: Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
        | Word16
cont Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xdc00 Bool -> Bool -> Bool
&& Word16
cont Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xe00 =
            let !(W32# Word#
w) = ((Word16 -> Word32
to32 Word16
h Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x3ff) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
10)
                         Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.|. (Word16 -> Word32
to32 Word16
cont Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x3ff)
             in (Char, Offset Word16) -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. b -> Either a b
Right (Word# -> Char
toChar Word#
w, Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
2)
        | Bool
otherwise = UTF16_Invalid -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. a -> Either a b
Left UTF16_Invalid
InvalidContinuation
      where
        cont :: Word16
        !cont :: Word16
cont = Offset Word16 -> Word16
getter (Offset Word16 -> Word16) -> Offset Word16 -> Word16
forall a b. (a -> b) -> a -> b
$ Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
1

write :: (PrimMonad st, Monad st)
      => Char
      -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write :: Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write Char
c
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0xd800   = Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend (Word16
 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0x10000  = let (Word16
w1, Word16
w2) = Char -> (Word16, Word16)
wHigh Char
c in Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w1 Builder (UArray Word16) (MUArray Word16) Word16 st err ()
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w2
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0x10ffff = UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a e. Exception e => e -> a
throw (UTF16_Invalid
 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0xe000  = Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend (Word16
 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
    | Bool
otherwise = UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a e. Exception e => e -> a
throw (UTF16_Invalid
 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
  where
    w16 :: Char -> Word16
    w16 :: Char -> Word16
w16 (C# Char#
ch) = Word# -> Word16
W16# (Int# -> Word#
int2Word# (Char# -> Int#
ord# Char#
ch))

    to16 :: Word32 -> Word16
    to16 :: Word32 -> Word16
to16 = Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
Prelude.fromIntegral

    wHigh :: Char -> (Word16, Word16)
    wHigh :: Char -> (Word16, Word16)
wHigh (C# Char#
ch) =
        let v :: Word32
v = Word# -> Word32
W32# (Word# -> Word# -> Word#
minusWord# (Int# -> Word#
int2Word# (Char# -> Int#
ord# Char#
ch)) Word#
0x10000##)
         in (Word16
0xdc00 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word32 -> Word16
to16 (Word32
v Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
10), Word16
0xd800 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word32 -> Word16
to16 (Word32
v Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x3ff))