-- |
-- Module      : Basement.String.Encoding.ASCII7
-- License     : BSD-style
-- Maintainer  : Foundation
-- Stability   : experimental
-- Portability : portable
--

{-# LANGUAGE MagicHash #-}

module Basement.String.Encoding.ASCII7
    ( ASCII7(..)
    , ASCII7_Invalid(..)
    ) where

import Basement.Compat.Base
import Basement.Types.OffsetSize
import Basement.Numerical.Additive
import Basement.Monad

import GHC.Prim
import GHC.Word
import GHC.Types
import Basement.UArray
import Basement.UArray.Mutable (MUArray)
import Basement.MutableBuilder

import Basement.String.Encoding.Encoding

-- | validate a given byte is within ASCII characters encoring size
--
-- This function check the 8th bit is set to 0
--
isAscii :: Word8 -> Bool
isAscii :: Word8 -> Bool
isAscii (W8# Word#
w) = Word# -> Word8
W8# (Word# -> Word# -> Word#
and# Word#
w Word#
0x80## ) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0
{-# INLINE isAscii #-}

data ASCII7_Invalid
    = ByteOutOfBound Word8
    | CharNotAscii   Char
  deriving (Typeable, Int -> ASCII7_Invalid -> ShowS
[ASCII7_Invalid] -> ShowS
ASCII7_Invalid -> String
(Int -> ASCII7_Invalid -> ShowS)
-> (ASCII7_Invalid -> String)
-> ([ASCII7_Invalid] -> ShowS)
-> Show ASCII7_Invalid
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ASCII7_Invalid] -> ShowS
$cshowList :: [ASCII7_Invalid] -> ShowS
show :: ASCII7_Invalid -> String
$cshow :: ASCII7_Invalid -> String
showsPrec :: Int -> ASCII7_Invalid -> ShowS
$cshowsPrec :: Int -> ASCII7_Invalid -> ShowS
Show, ASCII7_Invalid -> ASCII7_Invalid -> Bool
(ASCII7_Invalid -> ASCII7_Invalid -> Bool)
-> (ASCII7_Invalid -> ASCII7_Invalid -> Bool) -> Eq ASCII7_Invalid
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ASCII7_Invalid -> ASCII7_Invalid -> Bool
$c/= :: ASCII7_Invalid -> ASCII7_Invalid -> Bool
== :: ASCII7_Invalid -> ASCII7_Invalid -> Bool
$c== :: ASCII7_Invalid -> ASCII7_Invalid -> Bool
Eq)
instance Exception ASCII7_Invalid

data ASCII7 = ASCII7

instance Encoding ASCII7 where
    type Unit ASCII7 = Word8
    type Error ASCII7 = ASCII7_Invalid
    encodingNext :: ASCII7
-> (Offset (Unit ASCII7) -> Unit ASCII7)
-> Offset (Unit ASCII7)
-> Either (Error ASCII7) (Char, Offset (Unit ASCII7))
encodingNext  ASCII7
_ = (Offset Word8 -> Word8)
-> Offset Word8 -> Either ASCII7_Invalid (Char, Offset Word8)
(Offset (Unit ASCII7) -> Unit ASCII7)
-> Offset (Unit ASCII7)
-> Either (Error ASCII7) (Char, Offset (Unit ASCII7))
next
    encodingWrite :: ASCII7
-> Char
-> Builder
     (UArray (Unit ASCII7))
     (MUArray (Unit ASCII7))
     (Unit ASCII7)
     st
     err
     ()
encodingWrite ASCII7
_ = Char
-> Builder
     (UArray (Unit ASCII7))
     (MUArray (Unit ASCII7))
     (Unit ASCII7)
     st
     err
     ()
forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
Char -> Builder (UArray Word8) (MUArray Word8) Word8 st err ()
write

-- | consume an Ascii7 char and return the Unicode point and the position
-- of the next possible Ascii7 char
--
next :: (Offset Word8 -> Word8)
          -- ^ method to access a given byte
     -> Offset Word8
          -- ^ index of the byte
     -> Either ASCII7_Invalid (Char, Offset Word8)
          -- ^ either successfully validated the ASCII char and returned the
          -- next index or fail with an error
next :: (Offset Word8 -> Word8)
-> Offset Word8 -> Either ASCII7_Invalid (Char, Offset Word8)
next Offset Word8 -> Word8
getter Offset Word8
off
    | Word8 -> Bool
isAscii Word8
w8 = (Char, Offset Word8) -> Either ASCII7_Invalid (Char, Offset Word8)
forall a b. b -> Either a b
Right (Word# -> Char
toChar Word#
w, Offset Word8
off Offset Word8 -> Offset Word8 -> Offset Word8
forall a. Additive a => a -> a -> a
+ Offset Word8
1)
    | Bool
otherwise  = ASCII7_Invalid -> Either ASCII7_Invalid (Char, Offset Word8)
forall a b. a -> Either a b
Left (ASCII7_Invalid -> Either ASCII7_Invalid (Char, Offset Word8))
-> ASCII7_Invalid -> Either ASCII7_Invalid (Char, Offset Word8)
forall a b. (a -> b) -> a -> b
$ Word8 -> ASCII7_Invalid
ByteOutOfBound Word8
w8
  where
    !w8 :: Word8
w8@(W8# Word#
w) = Offset Word8 -> Word8
getter Offset Word8
off
    toChar :: Word# -> Char
    toChar :: Word# -> Char
toChar Word#
a = Char# -> Char
C# (Int# -> Char#
chr# (Word# -> Int#
word2Int# Word#
a))

-- Write ascii char
--
-- > build 64 $ sequence_ write "this is a simple list of char..."
--
write :: (PrimMonad st, Monad st)
      => Char
           -- ^ expecting it to be a valid Ascii character.
           -- otherwise this function will throw an exception
      -> Builder (UArray Word8) (MUArray Word8) Word8 st err ()
write :: Char -> Builder (UArray Word8) (MUArray Word8) Word8 st err ()
write Char
c
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0x80 = Word8 -> Builder (UArray Word8) (MUArray Word8) Word8 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend (Word8 -> Builder (UArray Word8) (MUArray Word8) Word8 st err ())
-> Word8 -> Builder (UArray Word8) (MUArray Word8) Word8 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> Word8
w8 Char
c
    | Bool
otherwise       = ASCII7_Invalid
-> Builder (UArray Word8) (MUArray Word8) Word8 st err ()
forall a e. Exception e => e -> a
throw (ASCII7_Invalid
 -> Builder (UArray Word8) (MUArray Word8) Word8 st err ())
-> ASCII7_Invalid
-> Builder (UArray Word8) (MUArray Word8) Word8 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> ASCII7_Invalid
CharNotAscii Char
c
  where
    w8 :: Char -> Word8
    w8 :: Char -> Word8
w8 (C# Char#
ch) = Word# -> Word8
W8# (Int# -> Word#
int2Word# (Char# -> Int#
ord# Char#
ch))