{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.UTF8
   Copyright   : Copyright (C) 2010-2024 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
-}
module Text.Pandoc.UTF8 ( readFile
                        , getContents
                        , writeFileWith
                        , writeFile
                        , putStrWith
                        , putStr
                        , putStrLnWith
                        , putStrLn
                        , hPutStrWith
                        , hPutStr
                        , hPutStrLnWith
                        , hPutStrLn
                        , hGetContents
                        , toString
                        , toText
                        , fromString
                        , fromText
                        , toStringLazy
                        , fromTextLazy
                        , toTextLazy
                        , fromStringLazy
                        , encodePath
                        , decodeArg
                        )

where

import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile)
import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr,
                  putStrLn, readFile, writeFile)

readFile :: FilePath -> IO Text
readFile :: FilePath -> IO Text
readFile FilePath
f = do
  Handle
h <- FilePath -> IOMode -> IO Handle
openFile (FilePath -> FilePath
encodePath FilePath
f) IOMode
ReadMode
  Handle -> IO Text
hGetContents Handle
h

getContents :: IO Text
getContents :: IO Text
getContents = Handle -> IO Text
hGetContents Handle
stdin

writeFileWith :: Newline -> FilePath -> Text -> IO ()
writeFileWith :: Newline -> FilePath -> Text -> IO ()
writeFileWith Newline
eol FilePath
f Text
s =
  FilePath -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. FilePath -> IOMode -> (Handle -> IO r) -> IO r
withFile (FilePath -> FilePath
encodePath FilePath
f) IOMode
WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
h -> Newline -> Handle -> Text -> IO ()
hPutStrWith Newline
eol Handle
h Text
s

writeFile :: FilePath -> Text -> IO ()
writeFile :: FilePath -> Text -> IO ()
writeFile = Newline -> FilePath -> Text -> IO ()
writeFileWith Newline
nativeNewline

putStrWith :: Newline -> Text -> IO ()
putStrWith :: Newline -> Text -> IO ()
putStrWith Newline
eol Text
s = Newline -> Handle -> Text -> IO ()
hPutStrWith Newline
eol Handle
stdout Text
s

putStr :: Text -> IO ()
putStr :: Text -> IO ()
putStr = Newline -> Text -> IO ()
putStrWith Newline
nativeNewline

putStrLnWith :: Newline -> Text -> IO ()
putStrLnWith :: Newline -> Text -> IO ()
putStrLnWith Newline
eol Text
s = Newline -> Handle -> Text -> IO ()
hPutStrLnWith Newline
eol Handle
stdout Text
s

putStrLn :: Text -> IO ()
putStrLn :: Text -> IO ()
putStrLn = Newline -> Text -> IO ()
putStrLnWith Newline
nativeNewline

hPutStrWith :: Newline -> Handle -> Text -> IO ()
hPutStrWith :: Newline -> Handle -> Text -> IO ()
hPutStrWith Newline
eol Handle
h Text
s =
  Handle -> NewlineMode -> IO ()
hSetNewlineMode Handle
h (Newline -> Newline -> NewlineMode
NewlineMode Newline
eol Newline
eol) IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
  Handle -> TextEncoding -> IO ()
hSetEncoding Handle
h TextEncoding
utf8 IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Handle -> Text -> IO ()
TIO.hPutStr Handle
h Text
s

hPutStr :: Handle -> Text -> IO ()
hPutStr :: Handle -> Text -> IO ()
hPutStr = Newline -> Handle -> Text -> IO ()
hPutStrWith Newline
nativeNewline

hPutStrLnWith :: Newline -> Handle -> Text -> IO ()
hPutStrLnWith :: Newline -> Handle -> Text -> IO ()
hPutStrLnWith Newline
eol Handle
h Text
s =
  Handle -> NewlineMode -> IO ()
hSetNewlineMode Handle
h (Newline -> Newline -> NewlineMode
NewlineMode Newline
eol Newline
eol) IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
  Handle -> TextEncoding -> IO ()
hSetEncoding Handle
h TextEncoding
utf8 IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Handle -> Text -> IO ()
TIO.hPutStrLn Handle
h Text
s

hPutStrLn :: Handle -> Text -> IO ()
hPutStrLn :: Handle -> Text -> IO ()
hPutStrLn = Newline -> Handle -> Text -> IO ()
hPutStrLnWith Newline
nativeNewline

hGetContents :: Handle -> IO Text
hGetContents :: Handle -> IO Text
hGetContents = (ByteString -> Text) -> IO ByteString -> IO Text
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Text
toText (IO ByteString -> IO Text)
-> (Handle -> IO ByteString) -> Handle -> IO Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> IO ByteString
B.hGetContents

-- | Convert UTF8-encoded ByteString to Text, also
-- removing '\\r' characters.
toText :: B.ByteString -> Text
toText :: ByteString -> Text
toText = ByteString -> Text
T.decodeUtf8 (ByteString -> Text)
-> (ByteString -> ByteString) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
filterCRs (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
dropBOM
  where dropBOM :: ByteString -> ByteString
dropBOM ByteString
bs =
         if ByteString
"\xEF\xBB\xBF" ByteString -> ByteString -> Bool
`B.isPrefixOf` ByteString
bs
            then Int -> ByteString -> ByteString
B.drop Int
3 ByteString
bs
            else ByteString
bs
        filterCRs :: ByteString -> ByteString
filterCRs = (Char -> Bool) -> ByteString -> ByteString
B.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\r')

-- | Convert UTF8-encoded ByteString to String, also
-- removing '\\r' characters.
toString :: B.ByteString -> String
toString :: ByteString -> FilePath
toString = Text -> FilePath
T.unpack (Text -> FilePath)
-> (ByteString -> Text) -> ByteString -> FilePath
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
toText

-- | Convert UTF8-encoded ByteString to Text, also
-- removing '\\r' characters.
toTextLazy :: BL.ByteString -> TL.Text
toTextLazy :: ByteString -> Text
toTextLazy = ByteString -> Text
TL.decodeUtf8 (ByteString -> Text)
-> (ByteString -> ByteString) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
filterCRs (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
dropBOM
  where dropBOM :: ByteString -> ByteString
dropBOM ByteString
bs =
         if ByteString
"\xEF\xBB\xBF" ByteString -> ByteString -> Bool
`BL.isPrefixOf` ByteString
bs
            then Int64 -> ByteString -> ByteString
BL.drop Int64
3 ByteString
bs
            else ByteString
bs
        filterCRs :: ByteString -> ByteString
filterCRs = (Char -> Bool) -> ByteString -> ByteString
BL.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\r')

-- | Convert UTF8-encoded ByteString to String, also
-- removing '\\r' characters.
toStringLazy :: BL.ByteString -> String
toStringLazy :: ByteString -> FilePath
toStringLazy = Text -> FilePath
TL.unpack (Text -> FilePath)
-> (ByteString -> Text) -> ByteString -> FilePath
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
toTextLazy

fromText :: Text -> B.ByteString
fromText :: Text -> ByteString
fromText = Text -> ByteString
T.encodeUtf8

fromTextLazy :: TL.Text -> BL.ByteString
fromTextLazy :: Text -> ByteString
fromTextLazy = Text -> ByteString
TL.encodeUtf8

fromString :: String -> B.ByteString
fromString :: FilePath -> ByteString
fromString = Text -> ByteString
fromText (Text -> ByteString)
-> (FilePath -> Text) -> FilePath -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FilePath -> Text
T.pack

fromStringLazy :: String -> BL.ByteString
fromStringLazy :: FilePath -> ByteString
fromStringLazy = Text -> ByteString
fromTextLazy (Text -> ByteString)
-> (FilePath -> Text) -> FilePath -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FilePath -> Text
TL.pack

encodePath :: FilePath -> FilePath
encodePath :: FilePath -> FilePath
encodePath = FilePath -> FilePath
forall a. a -> a
id

{-# DEPRECATED decodeArg "decodeArg is now a no-op" #-}
decodeArg :: String -> String
decodeArg :: FilePath -> FilePath
decodeArg = FilePath -> FilePath
forall a. a -> a
id