{-# LANGUAGE CPP #-}

module Binrep.Type.Text.Encoding.ShiftJis where

import Refined

import Data.Text ( Text )

#ifdef HAVE_ICU
import Data.Text.ICU.Convert qualified as ICU
import System.IO.Unsafe qualified
import Data.ByteString qualified as B

import Binrep.Type.Text.Internal
#endif

data ShiftJis

-- | TODO Unsafely assume all 'Text's are valid Shift-JIS.
instance Predicate ShiftJis Text where validate :: Proxy ShiftJis -> Text -> Maybe RefineException
validate Proxy ShiftJis
_ Text
_ = Maybe RefineException
success

#ifdef HAVE_ICU
instance Encode ShiftJis where encode' :: Text -> Bytes
encode' = String -> Text -> Bytes
encodeViaTextICU' String
"Shift-JIS"
instance Decode ShiftJis where
    decode :: Bytes -> Either String (AsText ShiftJis)
decode  = (String -> String)
-> (Bytes -> Either String Text)
-> Bytes
-> Either String (AsText ShiftJis)
forall {k} (enc :: k) e.
(e -> String)
-> (Bytes -> Either e Text) -> Bytes -> Either String (AsText enc)
decodeText String -> String
forall a. a -> a
id ((Bytes -> Either String Text)
 -> Bytes -> Either String (AsText ShiftJis))
-> (Bytes -> Either String Text)
-> Bytes
-> Either String (AsText ShiftJis)
forall a b. (a -> b) -> a -> b
$ String -> Bytes -> Either String Text
decodeViaTextICU' String
"Shift-JIS"

-- | Encode some 'Text' to the given character set using text-icu.
--
-- No guarantees about correctness. Encodings are weird. e.g. Shift JIS's
-- yen/backslash problem is apparently to do with OSs treating it differently.
--
-- Expects a 'Text' that is confirmed valid for converting to the character set.
--
-- The charset must be valid, or it's exception time. See text-icu.
encodeViaTextICU :: String -> Text -> IO B.ByteString
encodeViaTextICU :: String -> Text -> IO Bytes
encodeViaTextICU String
charset Text
t = do
    Converter
conv <- String -> Maybe Bool -> IO Converter
ICU.open String
charset Maybe Bool
forall a. Maybe a
Nothing
    Bytes -> IO Bytes
forall a. a -> IO a
forall (f :: Type -> Type) a. Applicative f => a -> f a
pure (Bytes -> IO Bytes) -> Bytes -> IO Bytes
forall a b. (a -> b) -> a -> b
$ Converter -> Text -> Bytes
ICU.fromUnicode Converter
conv Text
t

encodeViaTextICU' :: String -> Text -> B.ByteString
encodeViaTextICU' :: String -> Text -> Bytes
encodeViaTextICU' String
charset Text
t =
    IO Bytes -> Bytes
forall a. IO a -> a
System.IO.Unsafe.unsafeDupablePerformIO (IO Bytes -> Bytes) -> IO Bytes -> Bytes
forall a b. (a -> b) -> a -> b
$ String -> Text -> IO Bytes
encodeViaTextICU String
charset Text
t

-- TODO Shitty library doesn't let us say how to handle errors. Apparently, the
-- only solution is to scan through the resulting 'Text' to look for @\SUB@
-- characters, or lie about correctness. Sigh.
decodeViaTextICU :: String -> B.ByteString -> IO (Either String Text)
decodeViaTextICU :: String -> Bytes -> IO (Either String Text)
decodeViaTextICU String
charset Bytes
t = do
    Converter
conv <- String -> Maybe Bool -> IO Converter
ICU.open String
charset Maybe Bool
forall a. Maybe a
Nothing
    Either String Text -> IO (Either String Text)
forall a. a -> IO a
forall (f :: Type -> Type) a. Applicative f => a -> f a
pure (Either String Text -> IO (Either String Text))
-> Either String Text -> IO (Either String Text)
forall a b. (a -> b) -> a -> b
$ Text -> Either String Text
forall a b. b -> Either a b
Right (Text -> Either String Text) -> Text -> Either String Text
forall a b. (a -> b) -> a -> b
$ Converter -> Bytes -> Text
ICU.toUnicode Converter
conv Bytes
t

decodeViaTextICU' :: String -> B.ByteString -> Either String Text
decodeViaTextICU' :: String -> Bytes -> Either String Text
decodeViaTextICU' String
charset Bytes
t = do
    IO (Either String Text) -> Either String Text
forall a. IO a -> a
System.IO.Unsafe.unsafeDupablePerformIO (IO (Either String Text) -> Either String Text)
-> IO (Either String Text) -> Either String Text
forall a b. (a -> b) -> a -> b
$ String -> Bytes -> IO (Either String Text)
decodeViaTextICU String
charset Bytes
t
#endif