{-# LANGUAGE CPP #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE UnliftedFFITypes #-}

module System.OsString.Internal where

import System.OsString.Internal.Types

import Control.Monad.Catch
    ( MonadThrow )
import Data.ByteString
    ( ByteString )
import System.OsPath.Data.ByteString.Short
    ( fromShort )
import Data.Char
import Language.Haskell.TH
import Language.Haskell.TH.Quote
    ( QuasiQuoter (..) )
import Language.Haskell.TH.Syntax
    ( Lift (..), lift )
import System.IO
    ( TextEncoding )

import System.OsPath.Encoding ( encodeWith, EncodingException(..) )
import GHC.IO.Encoding.Failure ( CodingFailureMode(..) )
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
import GHC.IO.Encoding.UTF16 ( mkUTF16le )
import System.OsString.Windows
import qualified System.OsString.Windows as PF
#else
import GHC.IO.Encoding.UTF8 ( mkUTF8 )
import System.OsString.Posix
import qualified System.OsString.Posix as PF
#endif




-- | Partial unicode friendly encoding.
--
-- On windows this encodes as UTF16-LE (strictly), which is a pretty good guess.
-- On unix this encodes as UTF8 (strictly), which is a good guess.
--
-- Throws a 'EncodingException' if encoding fails.
toOsStringUtf :: MonadThrow m => String -> m OsString
toOsStringUtf :: String -> m OsString
toOsStringUtf = (PlatformString -> OsString) -> m PlatformString -> m OsString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap PlatformString -> OsString
OsString (m PlatformString -> m OsString)
-> (String -> m PlatformString) -> String -> m OsString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> m PlatformString
forall (m :: * -> *). MonadThrow m => String -> m PlatformString
toPlatformStringUtf

-- | Like 'toOsStringUtf', except allows to provide encodings.
toOsStringEnc :: TextEncoding  -- ^ unix text encoding
              -> TextEncoding  -- ^ windows text encoding
              -> String
              -> Either EncodingException OsString
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
toOsStringEnc _ winEnc str = OsString <$> toPlatformStringEnc winEnc str
#else
toOsStringEnc :: TextEncoding
-> TextEncoding -> String -> Either EncodingException OsString
toOsStringEnc TextEncoding
unixEnc TextEncoding
_ String
str = PlatformString -> OsString
OsString (PlatformString -> OsString)
-> Either EncodingException PlatformString
-> Either EncodingException OsString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TextEncoding -> String -> Either EncodingException PlatformString
toPlatformStringEnc TextEncoding
unixEnc String
str
#endif

-- | Like 'toOsStringUtf', except this mimics the behavior of the base library when doing filesystem
-- operations, which is:
--
-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale,
--    but PEP 383 only works properly on UTF-8 encodings, so good luck)
-- 2. on windows does permissive UTF-16 encoding, where coding errors generate
--    Chars in the surrogate range
--
-- Looking up the locale requires IO. If you're not worried about calls
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
-- to deeply evaluate the result to catch exceptions).
toOsStringFS :: String -> IO OsString
toOsStringFS :: String -> IO OsString
toOsStringFS = (PlatformString -> OsString) -> IO PlatformString -> IO OsString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap PlatformString -> OsString
OsString (IO PlatformString -> IO OsString)
-> (String -> IO PlatformString) -> String -> IO OsString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IO PlatformString
toPlatformStringFS


-- | Partial unicode friendly decoding.
--
-- On windows this decodes as UTF16-LE (strictly), which is a pretty good guess.
-- On unix this decodes as UTF8 (strictly), which is a good guess. Note that
-- filenames on unix are encoding agnostic char arrays.
--
-- Throws a 'EncodingException' if decoding fails.
fromOsStringUtf :: MonadThrow m => OsString -> m String
fromOsStringUtf :: OsString -> m String
fromOsStringUtf (OsString PlatformString
x) = PlatformString -> m String
forall (m :: * -> *). MonadThrow m => PlatformString -> m String
fromPlatformStringUtf PlatformString
x

-- | Like 'fromOsStringUtf', except allows to provide encodings.
--
-- The String is forced into memory to catch all exceptions.
fromOsStringEnc :: TextEncoding  -- ^ unix text encoding
                -> TextEncoding  -- ^ windows text encoding
                -> OsString
                -> Either EncodingException String
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
fromOsStringEnc _ winEnc (OsString x) = fromPlatformStringEnc winEnc x
#else
fromOsStringEnc :: TextEncoding
-> TextEncoding -> OsString -> Either EncodingException String
fromOsStringEnc TextEncoding
unixEnc TextEncoding
_ (OsString PlatformString
x) = TextEncoding -> PlatformString -> Either EncodingException String
fromPlatformStringEnc TextEncoding
unixEnc PlatformString
x
#endif


-- | Like 'fromOsStringUtf', except this mimics the behavior of the base library when doing filesystem
-- operations, which is:
--
-- 1. on unix, uses shady PEP 383 style encoding (based on the current locale,
--    but PEP 383 only works properly on UTF-8 encodings, so good luck)
-- 2. on windows does permissive UTF-16 encoding, where coding errors generate
--    Chars in the surrogate range
--
-- Looking up the locale requires IO. If you're not worried about calls
-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible (make sure
-- to deeply evaluate the result to catch exceptions).
fromOsStringFS :: OsString -> IO String
fromOsStringFS :: OsString -> IO String
fromOsStringFS (OsString PlatformString
x) = PlatformString -> IO String
fromPlatformStringFS PlatformString
x


-- | Constructs an @OsString@ from a ByteString.
--
-- On windows, this ensures valid UCS-2LE, on unix it is passed unchanged/unchecked.
--
-- Throws 'EncodingException' on invalid UCS-2LE on windows (although unlikely).
bytesToOsString :: MonadThrow m
                => ByteString
                -> m OsString
bytesToOsString :: ByteString -> m OsString
bytesToOsString = (PlatformString -> OsString) -> m PlatformString -> m OsString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap PlatformString -> OsString
OsString (m PlatformString -> m OsString)
-> (ByteString -> m PlatformString) -> ByteString -> m OsString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> m PlatformString
forall (m :: * -> *).
MonadThrow m =>
ByteString -> m PlatformString
bytesToPlatformString


qq :: (ByteString -> Q Exp) -> QuasiQuoter
qq :: (ByteString -> Q Exp) -> QuasiQuoter
qq ByteString -> Q Exp
quoteExp' =
  QuasiQuoter :: (String -> Q Exp)
-> (String -> Q Pat)
-> (String -> Q Type)
-> (String -> Q [Dec])
-> QuasiQuoter
QuasiQuoter
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
  { quoteExp  = quoteExp' . fromShort . either (error . show) id . encodeWith (mkUTF16le ErrorOnCodingFailure)
  , quotePat  = \_ ->
      fail "illegal QuasiQuote (allowed as expression only, used as a pattern)"
  , quoteType = \_ ->
      fail "illegal QuasiQuote (allowed as expression only, used as a type)"
  , quoteDec  = \_ ->
      fail "illegal QuasiQuote (allowed as expression only, used as a declaration)"
  }
#else
  { quoteExp :: String -> Q Exp
quoteExp  = ByteString -> Q Exp
quoteExp' (ByteString -> Q Exp) -> (String -> ByteString) -> String -> Q Exp
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> ByteString
fromShort (ShortByteString -> ByteString)
-> (String -> ShortByteString) -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (EncodingException -> ShortByteString)
-> (ShortByteString -> ShortByteString)
-> Either EncodingException ShortByteString
-> ShortByteString
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> ShortByteString
forall a. HasCallStack => String -> a
error (String -> ShortByteString)
-> (EncodingException -> String)
-> EncodingException
-> ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. EncodingException -> String
forall a. Show a => a -> String
show) ShortByteString -> ShortByteString
forall a. a -> a
id (Either EncodingException ShortByteString -> ShortByteString)
-> (String -> Either EncodingException ShortByteString)
-> String
-> ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextEncoding -> String -> Either EncodingException ShortByteString
encodeWith (CodingFailureMode -> TextEncoding
mkUTF8 CodingFailureMode
ErrorOnCodingFailure)
  , quotePat :: String -> Q Pat
quotePat  = \String
_ ->
      String -> Q Pat
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"illegal QuasiQuote (allowed as expression only, used as a pattern)"
  , quoteType :: String -> Q Type
quoteType = \String
_ ->
      String -> Q Type
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"illegal QuasiQuote (allowed as expression only, used as a type)"
  , quoteDec :: String -> Q [Dec]
quoteDec  = \String
_ ->
      String -> Q [Dec]
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"illegal QuasiQuote (allowed as expression only, used as a declaration)"
  }
#endif

mkOsString :: ByteString -> Q Exp
mkOsString :: ByteString -> Q Exp
mkOsString ByteString
bs =
  case ByteString -> Maybe OsString
forall (m :: * -> *). MonadThrow m => ByteString -> m OsString
bytesToOsString ByteString
bs of
    Just OsString
afp -> OsString -> Q Exp
forall t. Lift t => t -> Q Exp
lift OsString
afp
    Maybe OsString
Nothing -> String -> Q Exp
forall a. HasCallStack => String -> a
error String
"invalid encoding"

-- | QuasiQuote an 'OsString'. This accepts Unicode characters
-- and encodes as UTF-8 on unix and UTF-16 on windows.
osstr :: QuasiQuoter
osstr :: QuasiQuoter
osstr = (ByteString -> Q Exp) -> QuasiQuoter
qq ByteString -> Q Exp
mkOsString


-- | Unpack an 'OsString' to a list of 'OsChar'.
unpackOsString :: OsString -> [OsChar]
unpackOsString :: OsString -> [OsChar]
unpackOsString (OsString PlatformString
x) = PlatformChar -> OsChar
OsChar (PlatformChar -> OsChar) -> [PlatformChar] -> [OsChar]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> PlatformString -> [PlatformChar]
unpackPlatformString PlatformString
x


-- | Pack a list of 'OsChar' to an 'OsString'
--
-- Note that using this in conjunction with 'unsafeFromChar' to
-- convert from @[Char]@ to 'OsString' is probably not what
-- you want, because it will truncate unicode code points.
packOsString :: [OsChar] -> OsString
packOsString :: [OsChar] -> OsString
packOsString = PlatformString -> OsString
OsString (PlatformString -> OsString)
-> ([OsChar] -> PlatformString) -> [OsChar] -> OsString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PlatformChar] -> PlatformString
packPlatformString ([PlatformChar] -> PlatformString)
-> ([OsChar] -> [PlatformChar]) -> [OsChar] -> PlatformString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (OsChar -> PlatformChar) -> [OsChar] -> [PlatformChar]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(OsChar PlatformChar
x) -> PlatformChar
x)


-- | Truncates on unix to 1 and on Windows to 2 octets.
unsafeFromChar :: Char -> OsChar
unsafeFromChar :: Char -> OsChar
unsafeFromChar = PlatformChar -> OsChar
OsChar (PlatformChar -> OsChar)
-> (Char -> PlatformChar) -> Char -> OsChar
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> PlatformChar
PF.unsafeFromChar

-- | Converts back to a unicode codepoint (total).
toChar :: OsChar -> Char
#if defined(mingw32_HOST_OS) || defined(__MINGW32__)
toChar (OsChar (WindowsChar w)) = chr $ fromIntegral w
#else
toChar :: OsChar -> Char
toChar (OsChar (PosixChar Word8
w)) = Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w
#endif