-- |
-- Module      : Streamly.Internal.Unicode.Char.Parser
-- Copyright   : (c) 2021 Composewell Technologies
-- License     : BSD-3-Clause
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
-- Portability : GHC
--
-- To parse a text input, use the decode routines from
-- "Streamly.Unicode.Stream" module to convert an input byte stream to a
-- Unicode Char stream and then use these parsers on the Char stream.

-- XXX Add explicit export list.
module Streamly.Internal.Unicode.Char.Parser
    ( alpha
    , alphaNum
    , ascii
    , asciiLower
    , asciiUpper
    , char
    , decimal
    , digit
    , double
    , hexadecimal
    , hexDigit
    , latin1
    , letter
    , lower
    , mark
    , number
    , octDigit
    , print
    , punctuation
    , separator
    , signed
    , space
    , symbol
    , upper
    )
where

import Control.Applicative (Alternative(..))
import Control.Monad.Catch (MonadCatch)
import Data.Bits (Bits, (.|.), shiftL)
import Data.Char (ord)
import Prelude hiding (print)
import Streamly.Internal.Data.Parser (Parser)

import qualified Data.Char as Char
import qualified Streamly.Internal.Data.Parser as Parser
import qualified Streamly.Internal.Data.Fold as Fold

--------------------------------------------------------------------------------
-- Character classification
--------------------------------------------------------------------------------

-- XXX It may be possible to implement faster predicates for ASCII byte stream.
-- We can measure if there is a signficant difference and if so we can add such
-- predicates to Streamly.Unicode.Char.Parser.Latin1.
--
#define CHAR_PARSER_SIG(NAME)         NAME :: MonadCatch m => Parser m Char Char
-- XXX Need to use the predicates from Unicode.Char module/unicode-data package
#define CHAR_PARSER(NAME, PREDICATE)  NAME = Parser.satisfy Char.PREDICATE

-- XXX Add haddock documentation
-- XXX Add INLINE pragmas

CHAR_PARSER_SIG(space)
CHAR_PARSER(space,isSpace)

CHAR_PARSER_SIG(lower)
CHAR_PARSER(lower,isLower)

CHAR_PARSER_SIG(upper)
CHAR_PARSER(upper,isUpper)

CHAR_PARSER_SIG(alpha)
CHAR_PARSER(alpha,isAlpha)

CHAR_PARSER_SIG(alphaNum)
CHAR_PARSER(alphaNum,isAlphaNum)

CHAR_PARSER_SIG(print)
CHAR_PARSER(print,isPrint)

CHAR_PARSER_SIG(digit)
CHAR_PARSER(digit,isDigit)

CHAR_PARSER_SIG(octDigit)
CHAR_PARSER(octDigit,isOctDigit)

CHAR_PARSER_SIG(hexDigit)
CHAR_PARSER(hexDigit,isHexDigit)

CHAR_PARSER_SIG(letter)
CHAR_PARSER(letter,isLetter)

CHAR_PARSER_SIG(mark)
CHAR_PARSER(mark,isMark)

CHAR_PARSER_SIG(number)
CHAR_PARSER(number,isNumber)

CHAR_PARSER_SIG(punctuation)
punctuation :: Parser m Char Char
CHAR_PARSER(punctuation,isPunctuation)

CHAR_PARSER_SIG(symbol)
CHAR_PARSER(symbol,isSymbol)

CHAR_PARSER_SIG(separator)
CHAR_PARSER(separator,isSeparator)

CHAR_PARSER_SIG(ascii)
CHAR_PARSER(ascii,isAscii)

CHAR_PARSER_SIG(latin1)
CHAR_PARSER(latin1,isLatin1)

CHAR_PARSER_SIG(asciiUpper)
CHAR_PARSER(asciiUpper,isAsciiUpper)

CHAR_PARSER_SIG(asciiLower)
CHAR_PARSER(asciiLower,isAsciiLower)

--------------------------------------------------------------------------------
-- Character parsers
--------------------------------------------------------------------------------

-- | Match a specific character.
{-# INLINE char #-}
char :: MonadCatch m => Char -> Parser m Char Char
char :: Char -> Parser m Char Char
char Char
c = (Char -> Bool) -> Parser m Char Char
forall (m :: * -> *) a. MonadCatch m => (a -> Bool) -> Parser m a a
Parser.satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)

--------------------------------------------------------------------------------
-- Numeric parsers
--------------------------------------------------------------------------------

-- XXX It should fail if the number is larger than the size of the type.
--
-- | Parse and decode an unsigned integral decimal number.
{-# INLINE decimal #-}
decimal :: (MonadCatch m, Integral a) => Parser m Char a
decimal :: Parser m Char a
decimal = (Char -> Bool) -> Fold m Char a -> Parser m Char a
forall (m :: * -> *) a b.
MonadCatch m =>
(a -> Bool) -> Fold m a b -> Parser m a b
Parser.takeWhile1 Char -> Bool
Char.isDigit ((a -> Char -> a) -> a -> Fold m Char a
forall (m :: * -> *) b a.
Monad m =>
(b -> a -> b) -> b -> Fold m a b
Fold.foldl' a -> Char -> a
forall a. Num a => a -> Char -> a
step a
0)

    where

    step :: a -> Char -> a
step a
a Char
c = a
a a -> a -> a
forall a. Num a => a -> a -> a
* a
10 a -> a -> a
forall a. Num a => a -> a -> a
+ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
48)

-- | Parse and decode an unsigned integral hexadecimal number.  The hex digits
-- @\'a\'@ through @\'f\'@ may be upper or lower case.
--
-- Note: This parser does not accept a leading @\"0x\"@ string.
{-# INLINE hexadecimal #-}
hexadecimal :: (MonadCatch m, Integral a, Bits a) => Parser m Char a
hexadecimal :: Parser m Char a
hexadecimal = (Char -> Bool) -> Fold m Char a -> Parser m Char a
forall (m :: * -> *) a b.
MonadCatch m =>
(a -> Bool) -> Fold m a b -> Parser m a b
Parser.takeWhile1 Char -> Bool
isHexDigit ((a -> Char -> a) -> a -> Fold m Char a
forall (m :: * -> *) b a.
Monad m =>
(b -> a -> b) -> b -> Fold m a b
Fold.foldl' a -> Char -> a
forall a. (Bits a, Num a) => a -> Char -> a
step a
0)

    where

    isHexDigit :: Char -> Bool
isHexDigit Char
c =
           (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'f')
        Bool -> Bool -> Bool
|| (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'F')

    step :: a -> Char -> a
step a
a Char
c
        | Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
48 Bool -> Bool -> Bool
&& Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
57 =
            (a
a a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) a -> a -> a
forall a. Bits a => a -> a -> a
.|. Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
w Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
48)
        | Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
97 =
            (a
a a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) a -> a -> a
forall a. Bits a => a -> a -> a
.|. Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
w Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
87)
        | Bool
otherwise =
            (a
a a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
4) a -> a -> a
forall a. Bits a => a -> a -> a
.|. Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
w Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
55)

        where

        w :: Int
w = Char -> Int
ord Char
c

-- | Allow an optional leading @\'+\'@ or @\'-\'@ sign character before any
-- parser.
{-# INLINE signed #-}
signed :: (Num a, MonadCatch m) => Parser m Char a -> Parser m Char a
signed :: Parser m Char a -> Parser m Char a
signed Parser m Char a
p = (a -> a
forall a. Num a => a -> a
negate (a -> a) -> Parser m Char a -> Parser m Char a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Parser m Char Char
forall (m :: * -> *). MonadCatch m => Char -> Parser m Char Char
char Char
'-' Parser m Char Char -> Parser m Char a -> Parser m Char a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser m Char a
p)) Parser m Char a -> Parser m Char a -> Parser m Char a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser m Char Char
forall (m :: * -> *). MonadCatch m => Char -> Parser m Char Char
char Char
'+' Parser m Char Char -> Parser m Char a -> Parser m Char a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser m Char a
p) Parser m Char a -> Parser m Char a -> Parser m Char a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser m Char a
p

-- | Parse a 'Double'.
--
-- This parser accepts an optional leading sign character, followed by
-- at most one decimal digit.  The syntax is similar to that accepted by
-- the 'read' function, with the exception that a trailing @\'.\'@ is
-- consumed.
--
-- === Examples
--
-- Examples with behaviour identical to 'read', if you feed an empty
-- continuation to the first result:
--
-- > IS.parse double (IS.fromList "3")     == 3.0
-- > IS.parse double (IS.fromList "3.1")   == 3.1
-- > IS.parse double (IS.fromList "3e4")   == 30000.0
-- > IS.parse double (IS.fromList "3.1e4") == 31000.0
-- > IS.parse double (IS.fromList "3e")    == 30
--
-- Examples with behaviour identical to 'read':
--
-- > IS.parse (IS.fromList ".3")    == error "Parse failed"
-- > IS.parse (IS.fromList "e3")    == error "Parse failed"
--
-- Example of difference from 'read':
--
-- > IS.parse double (IS.fromList "3.foo") == 3.0
--
-- This function does not accept string representations of \"NaN\" or
-- \"Infinity\".
--
-- /Unimplemented/
double :: Parser m Char Double
double :: Parser m Char Double
double = Parser m Char Double
forall a. HasCallStack => a
undefined