{-# LANGUAGE OverloadedStrings, UnboxedTuples, CPP #-}
{-# LANGUAGE Trustworthy #-}

-- |
-- Module      : Data.Text.Read
-- Copyright   : (c) 2010, 2011 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Portability : GHC
--
-- Functions used frequently when reading textual data.
module Data.Text.Read
    (
      Reader
    , decimal
    , hexadecimal
    , signed
    , rational
    , double
    ) where

import Control.Monad (liftM)
import Data.Char (ord)
import Data.Int (Int8, Int16, Int32, Int64)
import Data.Ratio ((%))
import Data.Text as T
import Data.Text.Internal as T (Text(..))
import Data.Text.Array as A
import Data.Text.Internal.Private (spanAscii_)
import Data.Text.Internal.Read
import Data.Word (Word, Word8, Word16, Word32, Word64)

-- | Read some text.  If the read succeeds, return its value and the
-- remaining text, otherwise an error message.
type Reader a = Text -> Either String (a, Text)
type Parser a = IParser Text a

-- | Read a decimal integer.  The input must begin with at least one
-- decimal digit, and is consumed until a non-digit or end of string
-- is reached.
--
-- This function does not handle leading sign characters.  If you need
-- to handle signed input, use @'signed' 'decimal'@.
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
-- incorrect results.  If you are worried about overflow, use
-- 'Integer' for your result type.
decimal :: Integral a => Reader a
{-# SPECIALIZE decimal :: Reader Int #-}
{-# SPECIALIZE decimal :: Reader Int8 #-}
{-# SPECIALIZE decimal :: Reader Int16 #-}
{-# SPECIALIZE decimal :: Reader Int32 #-}
{-# SPECIALIZE decimal :: Reader Int64 #-}
{-# SPECIALIZE decimal :: Reader Integer #-}
{-# SPECIALIZE decimal :: Reader Data.Word.Word #-}
{-# SPECIALIZE decimal :: Reader Word8 #-}
{-# SPECIALIZE decimal :: Reader Word16 #-}
{-# SPECIALIZE decimal :: Reader Word32 #-}
{-# SPECIALIZE decimal :: Reader Word64 #-}
decimal :: forall a. Integral a => Reader a
decimal Text
txt
    | Text -> Bool
T.null Text
h  = forall a b. a -> Either a b
Left String
"input does not start with a digit"
    | Bool
otherwise = forall a b. b -> Either a b
Right (forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' forall {a}. Num a => a -> Char -> a
go a
0 Text
h, Text
t)
  where (# Text
h,Text
t #)  = (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ (\Word8
w -> Word8
w forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0' forall a. Ord a => a -> a -> Bool
< Word8
10) Text
txt
        go :: a -> Char -> a
go a
n Char
d = (a
n forall a. Num a => a -> a -> a
* a
10 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
digitToInt Char
d))

-- | Read a hexadecimal integer, consisting of an optional leading
-- @\"0x\"@ followed by at least one hexadecimal digit. Input is
-- consumed until a non-hex-digit or end of string is reached.
-- This function is case insensitive.
--
-- This function does not handle leading sign characters.  If you need
-- to handle signed input, use @'signed' 'hexadecimal'@.
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
-- incorrect results.  If you are worried about overflow, use
-- 'Integer' for your result type.
hexadecimal :: Integral a => Reader a
{-# SPECIALIZE hexadecimal :: Reader Int #-}
{-# SPECIALIZE hexadecimal :: Reader Int8 #-}
{-# SPECIALIZE hexadecimal :: Reader Int16 #-}
{-# SPECIALIZE hexadecimal :: Reader Int32 #-}
{-# SPECIALIZE hexadecimal :: Reader Int64 #-}
{-# SPECIALIZE hexadecimal :: Reader Integer #-}
{-# SPECIALIZE hexadecimal :: Reader Word #-}
{-# SPECIALIZE hexadecimal :: Reader Word8 #-}
{-# SPECIALIZE hexadecimal :: Reader Word16 #-}
{-# SPECIALIZE hexadecimal :: Reader Word32 #-}
{-# SPECIALIZE hexadecimal :: Reader Word64 #-}
hexadecimal :: forall a. Integral a => Reader a
hexadecimal Text
txt
    | Text
h forall a. Eq a => a -> a -> Bool
== Text
"0x" Bool -> Bool -> Bool
|| Text
h forall a. Eq a => a -> a -> Bool
== Text
"0X" = forall a. Integral a => Reader a
hex Text
t
    | Bool
otherwise              = forall a. Integral a => Reader a
hex Text
txt
 where (Text
h,Text
t) = Int -> Text -> (Text, Text)
T.splitAt Int
2 Text
txt

hex :: Integral a => Reader a
{-# SPECIALIZE hex :: Reader Int #-}
{-# SPECIALIZE hex :: Reader Int8 #-}
{-# SPECIALIZE hex :: Reader Int16 #-}
{-# SPECIALIZE hex :: Reader Int32 #-}
{-# SPECIALIZE hex :: Reader Int64 #-}
{-# SPECIALIZE hex :: Reader Integer #-}
{-# SPECIALIZE hex :: Reader Word #-}
{-# SPECIALIZE hex :: Reader Word8 #-}
{-# SPECIALIZE hex :: Reader Word16 #-}
{-# SPECIALIZE hex :: Reader Word32 #-}
{-# SPECIALIZE hex :: Reader Word64 #-}
hex :: forall a. Integral a => Reader a
hex Text
txt
    | Text -> Bool
T.null Text
h  = forall a b. a -> Either a b
Left String
"input does not start with a hexadecimal digit"
    | Bool
otherwise = forall a b. b -> Either a b
Right (forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' forall {a}. Num a => a -> Char -> a
go a
0 Text
h, Text
t)
  where (# Text
h,Text
t #)  = (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ (\Word8
w -> Word8
w forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0' forall a. Ord a => a -> a -> Bool
< Word8
10 Bool -> Bool -> Bool
|| Word8
w forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'A' forall a. Ord a => a -> a -> Bool
< Word8
6 Bool -> Bool -> Bool
|| Word8
w forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'a' forall a. Ord a => a -> a -> Bool
< Word8
6) Text
txt
        go :: a -> Char -> a
go a
n Char
d = (a
n forall a. Num a => a -> a -> a
* a
16 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
hexDigitToInt Char
d))

-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
-- apply it to the result of applying the given reader.
signed :: Num a => Reader a -> Reader a
{-# INLINE signed #-}
signed :: forall a. Num a => Reader a -> Reader a
signed Reader a
f = forall t a. IParser t a -> IReader t a
runP (forall a. Num a => Parser a -> Parser a
signa (forall t a. IReader t a -> IParser t a
P Reader a
f))

-- | Read a rational number.
--
-- This function accepts an optional leading sign character, followed
-- by at least one decimal digit.  The syntax similar to that accepted
-- by the 'read' function, with the exception that a trailing @\'.\'@
-- or @\'e\'@ /not/ followed by a number is not consumed.
--
-- Examples (with behaviour identical to 'read'):
--
-- >rational "3"     == Right (3.0, "")
-- >rational "3.1"   == Right (3.1, "")
-- >rational "3e4"   == Right (30000.0, "")
-- >rational "3.1e4" == Right (31000.0, "")
-- >rational ".3"    == Left "input does not start with a digit"
-- >rational "e3"    == Left "input does not start with a digit"
--
-- Examples of differences from 'read':
--
-- >rational "3.foo" == Right (3.0, ".foo")
-- >rational "3e"    == Right (3.0, "e")
rational :: Fractional a => Reader a
{-# SPECIALIZE rational :: Reader Double #-}
rational :: forall a. Fractional a => Reader a
rational = forall a.
Fractional a =>
(Integer -> Integer -> Integer -> a) -> Reader a
floaty forall a b. (a -> b) -> a -> b
$ \Integer
real Integer
frac Integer
fracDenom -> forall a. Fractional a => Rational -> a
fromRational forall a b. (a -> b) -> a -> b
$
                     Integer
real forall a. Integral a => a -> a -> Ratio a
% Integer
1 forall a. Num a => a -> a -> a
+ Integer
frac forall a. Integral a => a -> a -> Ratio a
% Integer
fracDenom

-- | Read a rational number.
--
-- The syntax accepted by this function is the same as for 'rational'.
--
-- /Note/: This function is almost ten times faster than 'rational',
-- but is slightly less accurate.
--
-- The 'Double' type supports about 16 decimal places of accuracy.
-- For 94.2% of numbers, this function and 'rational' give identical
-- results, but for the remaining 5.8%, this function loses precision
-- around the 15th decimal place.  For 0.001% of numbers, this
-- function will lose precision at the 13th or 14th decimal place.
double :: Reader Double
double :: Reader Double
double = forall a.
Fractional a =>
(Integer -> Integer -> Integer -> a) -> Reader a
floaty forall a b. (a -> b) -> a -> b
$ \Integer
real Integer
frac Integer
fracDenom ->
                   forall a. Num a => Integer -> a
fromInteger Integer
real forall a. Num a => a -> a -> a
+
                   forall a. Num a => Integer -> a
fromInteger Integer
frac forall a. Fractional a => a -> a -> a
/ forall a. Num a => Integer -> a
fromInteger Integer
fracDenom

signa :: Num a => Parser a -> Parser a
{-# SPECIALIZE signa :: Parser Int -> Parser Int #-}
{-# SPECIALIZE signa :: Parser Int8 -> Parser Int8 #-}
{-# SPECIALIZE signa :: Parser Int16 -> Parser Int16 #-}
{-# SPECIALIZE signa :: Parser Int32 -> Parser Int32 #-}
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
signa :: forall a. Num a => Parser a -> Parser a
signa Parser a
p = do
  Word8
sign <- forall a t. a -> IParser t a -> IParser t a
perhaps (Char -> Word8
ord8 Char
'+') forall a b. (a -> b) -> a -> b
$ (Word8 -> Bool) -> IParser Text Word8
charAscii (\Word8
c -> Word8
c forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'-' Bool -> Bool -> Bool
|| Word8
c forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+')
  if Word8
sign forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+' then Parser a
p else forall a. Num a => a -> a
negate forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser a
p

charAscii :: (Word8 -> Bool) -> Parser Word8
charAscii :: (Word8 -> Bool) -> IParser Text Word8
charAscii Word8 -> Bool
p = forall t a. IReader t a -> IParser t a
P forall a b. (a -> b) -> a -> b
$ \(Text Array
arr Int
off Int
len) -> let c :: Word8
c = Array -> Int -> Word8
A.unsafeIndex Array
arr Int
off in
  if Int
len forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Word8 -> Bool
p Word8
c
  then forall a b. b -> Either a b
Right (Word8
c, Array -> Int -> Int -> Text
Text Array
arr (Int
off forall a. Num a => a -> a -> a
+ Int
1) (Int
len forall a. Num a => a -> a -> a
- Int
1))
  else forall a b. a -> Either a b
Left String
"character does not match"

floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Reader a
{-# INLINE floaty #-}
floaty :: forall a.
Fractional a =>
(Integer -> Integer -> Integer -> a) -> Reader a
floaty Integer -> Integer -> Integer -> a
f = forall t a. IParser t a -> IReader t a
runP forall a b. (a -> b) -> a -> b
$ do
  Word8
sign <- forall a t. a -> IParser t a -> IParser t a
perhaps (Char -> Word8
ord8 Char
'+') forall a b. (a -> b) -> a -> b
$ (Word8 -> Bool) -> IParser Text Word8
charAscii (\Word8
c -> Word8
c forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'-' Bool -> Bool -> Bool
|| Word8
c forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+')
  Integer
real <- forall t a. IReader t a -> IParser t a
P forall a. Integral a => Reader a
decimal
  T Integer
fraction Int
fracDigits <- forall a t. a -> IParser t a -> IParser t a
perhaps (Integer -> Int -> T
T Integer
0 Int
0) forall a b. (a -> b) -> a -> b
$ do
    Word8
_ <- (Word8 -> Bool) -> IParser Text Word8
charAscii (forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'.')
    Int
digits <- forall t a. IReader t a -> IParser t a
P forall a b. (a -> b) -> a -> b
$ \Text
t -> forall a b. b -> Either a b
Right (let (# Text
hd, Text
_ #) = (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ (\Word8
w -> Word8
w forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0' forall a. Ord a => a -> a -> Bool
< Word8
10) Text
t in Text -> Int
T.length Text
hd, Text
t)
    Integer
n <- forall t a. IReader t a -> IParser t a
P forall a. Integral a => Reader a
decimal
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Integer -> Int -> T
T Integer
n Int
digits
  let e :: Word8 -> Bool
e Word8
c = Word8
c forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'e' Bool -> Bool -> Bool
|| Word8
c forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'E'
  Int
power <- forall a t. a -> IParser t a -> IParser t a
perhaps Int
0 ((Word8 -> Bool) -> IParser Text Word8
charAscii Word8 -> Bool
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall a. Num a => Parser a -> Parser a
signa (forall t a. IReader t a -> IParser t a
P forall a. Integral a => Reader a
decimal) :: Parser Int)
  let n :: a
n = if Int
fracDigits forall a. Eq a => a -> a -> Bool
== Int
0
          then if Int
power forall a. Eq a => a -> a -> Bool
== Int
0
               then forall a. Num a => Integer -> a
fromInteger Integer
real
               else forall a. Num a => Integer -> a
fromInteger Integer
real forall a. Num a => a -> a -> a
* (a
10 forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
power)
          else if Int
power forall a. Eq a => a -> a -> Bool
== Int
0
               then Integer -> Integer -> Integer -> a
f Integer
real Integer
fraction (Integer
10 forall a b. (Num a, Integral b) => a -> b -> a
^ Int
fracDigits)
               else Integer -> Integer -> Integer -> a
f Integer
real Integer
fraction (Integer
10 forall a b. (Num a, Integral b) => a -> b -> a
^ Int
fracDigits) forall a. Num a => a -> a -> a
* (a
10 forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
power)
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! if Word8
sign forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+'
            then a
n
            else -a
n

ord8 :: Char -> Word8
ord8 :: Char -> Word8
ord8 = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord