module Foundation.String.Encoding.UTF16
( UTF16(..)
, UTF16_Invalid(..)
) where
import Foundation.Internal.Base
import Foundation.Primitive.Types.OffsetSize
import Foundation.Primitive.Monad
import GHC.Prim
import GHC.Word
import GHC.Types
import Foundation.Numerical
import Data.Bits
import qualified Prelude
import Foundation.Array.Unboxed
import Foundation.Array.Unboxed.Mutable (MUArray)
import Foundation.Boot.Builder
import Foundation.String.Encoding.Encoding
data UTF16_Invalid
= InvalidContinuation
| InvalidUnicode Char
deriving (Show, Eq, Typeable)
instance Exception UTF16_Invalid
data UTF16 = UTF16
instance Encoding UTF16 where
type Unit UTF16 = Word16
type Error UTF16 = UTF16_Invalid
encodingNext _ = next
encodingWrite _ = write
next :: (Offset Word16 -> Word16)
-> Offset Word16
-> Either UTF16_Invalid (Char, Offset Word16)
next getter off
| h < 0xd800 = Right (toChar hh, off + Offset 1)
| h >= 0xe000 = Right (toChar hh, off + Offset 1)
| otherwise = nextContinuation
where
h :: Word16
!h@(W16# hh) = getter off
toChar :: Word# -> Char
toChar w = C# (chr# (word2Int# w))
to32 :: Word16 -> Word32
to32 (W16# w) = W32# w
nextContinuation
| cont >= 0xdc00 && cont < 0xe00 =
let !(W32# w) = ((to32 h .&. 0x3ff) `shiftL` 10)
.|. (to32 cont .&. 0x3ff)
in Right (toChar w, off + Offset 2)
| otherwise = Left InvalidContinuation
where
cont :: Word16
!cont = getter $ off + Offset 1
write :: (PrimMonad st, Monad st)
=> Char
-> Builder (UArray Word16) (MUArray Word16) Word16 st ()
write c
| c < toEnum 0xd800 = builderAppend $ w16 c
| c > toEnum 0x10000 = let (w1, w2) = wHigh c in builderAppend w1 >> builderAppend w2
| c > toEnum 0x10ffff = throw $ InvalidUnicode c
| c >= toEnum 0xe000 = builderAppend $ w16 c
| otherwise = throw $ InvalidUnicode c
where
w16 :: Char -> Word16
w16 (C# ch) = W16# (int2Word# (ord# ch))
to16 :: Word32 -> Word16
to16 = Prelude.fromIntegral
wHigh :: Char -> (Word16, Word16)
wHigh (C# ch) =
let v = W32# (minusWord# (int2Word# (ord# ch)) 0x10000##)
in (0xdc00 .|. to16 (v `shiftR` 10), 0xd800 .|. to16 (v .&. 0x3ff))