{-# OPTIONS_HADDOCK show-extensions #-}
{-# OPTIONS_GHC -Wno-deprecations #-} --FIXME: remove when Text16 is removed
{-# LANGUAGE CPP,
             MagicHash,
             TypeFamilies,
             UnboxedTuples,
             StandaloneKindSignatures #-}
{-|
Module      : Parsley.Internal.Backend.Machine.InputRep
Description : Internal representations of input and miscellaneous operations.
License     : BSD-3-Clause
Maintainer  : Jamie Willis
Stability   : experimental

This module contains the translation from user input type to the
underlying parsley representation of it, as well as some miscellaneous
functions for working with specific types of input (these do not appear
in the rest of the machinery, but in "Parsley.Internal.Backend.Machine.InputOps"
and potentially the generated code).

@since 1.0.0.0
-}
module Parsley.Internal.Backend.Machine.InputRep (
    -- * Representation Type-Families
    DynRep, StaRep, RepKind,
    -- * @Int#@ Operations
    PartialStaOffset(..), dynOffset,
    intSame, intLess, intAdd, intSubNonNeg, min#, max#,
    -- * @Offwith@ Operations
    OffWith, offWithShiftRight,
    PartialStaOffWith(..), staOffWith,
    -- * @LazyByteString@ Operations
    UnpackedLazyByteString, emptyUnpackedLazyByteString,
    byteStringShiftRight, byteStringShiftLeft,
    -- * @Stream@ Operations
    dropStream,
    -- * @Text@ Operations
    StaText(..), PartialStaText(..), staText, offsetText, textShiftRight, textShiftLeft,
    -- * Crucial Exposed Functions
    {- |
    These functions must be exposed, since they can appear
    in the generated code.
    -}
    byteStringNext,
    textShiftRight#,  textShiftLeft#, byteStringShiftRight#, byteStringShiftLeft#,
    -- * Re-exports
    module Parsley.Internal.Core.InputTypes
  ) where

import Data.Array.Unboxed                (UArray)
import Data.ByteString.Internal          (ByteString(..))
import Data.Kind                         (Type)
import Data.Text.Internal                (Text(..))
import Data.Text.Unsafe                  (iter_, reverseIter_)
import GHC.Exts                          (Int(..), Char(..), TYPE, RuntimeRep(..), (==#), (<#), (+#), (-#), isTrue#)
#if __GLASGOW_HASKELL__ > 900
import GHC.Exts                          (LiftedRep)
#endif
import GHC.ForeignPtr                    (ForeignPtr(..), ForeignPtrContents)
import GHC.Prim                          (Int#, Addr#, nullAddr#, readWord8OffAddr#, word2Int#, chr#, touch#, realWorld#)
#if __GLASGOW_HASKELL__ > 900
import GHC.Prim                          (word8ToWord#)
#else
import GHC.Prim                          (Word#)
#endif
import Parsley.Internal.Common.Utils     (Code)
import Parsley.Internal.Core.InputTypes  (Text16(..), CharList(..), Stream(..))

import qualified Data.ByteString.Lazy.Internal as Lazy (ByteString(..))
import qualified Data.Text.Array               as Text (Array)

#if __GLASGOW_HASKELL__ <= 900
{-# INLINE word8ToWord# #-}
word8ToWord# :: Word# -> Word#
word8ToWord# x = x
#endif

{- Representation Types -}
{-|
This allows types like @String@ and @Stream@ to be manipulated
more efficiently by packaging them along with an offset which can
be used for quicker comparisons.

@since 1.0.0.0
-}
type OffWith ts = (# Int#, ts #)

data PartialStaOffWith ts = StaOW !PartialStaOffset !(Code ts) | DynOW !(Code (OffWith ts))

staOffWith :: PartialStaOffWith ts -> (PartialStaOffset -> Code ts -> Code a) -> Code a
staOffWith :: forall ts a.
PartialStaOffWith ts
-> (PartialStaOffset -> Code ts -> Code a) -> Code a
staOffWith (StaOW PartialStaOffset
po Code ts
qts) PartialStaOffset -> Code ts -> Code a
k = PartialStaOffset -> Code ts -> Code a
k PartialStaOffset
po Code ts
qts
staOffWith (DynOW Code (OffWith ts)
qots) PartialStaOffset -> Code ts -> Code a
k = [|| let !(# o, cs #) = $$qots in $$(k (StaO [||o||] 0) [||cs||]) ||]

{-|
This type unpacks /lazy/ `Lazy.ByteString`s for efficiency.

@since 1.0.0.0
-}
type UnpackedLazyByteString = (#
    Int#,
    Addr#,
    ForeignPtrContents,
    Int#,
    Int#,
    Lazy.ByteString
  #)

data PartialStaText = StaT {-# UNPACK #-} !StaText | DynT !(Code Text)

staText :: PartialStaText -> (StaText -> Code a) -> Code a
staText :: forall a. PartialStaText -> (StaText -> Code a) -> Code a
staText (StaT StaText
t) StaText -> Code a
k = StaText -> Code a
k StaText
t
staText (DynT Code Text
qt) StaText -> Code a
k = [||
    let !t@(Text arr off unconsumed) = $$qt
    in $$(k (StaText [||t||] [||arr||] [||off||] [||unconsumed||]))
  ||]

data StaText = StaText {
  StaText -> Code Text
origText       :: !(Code Text),
  StaText -> Code Array
arrText        :: !(Code Text.Array),
  StaText -> Code Int
offText        :: !(Code Int),
  StaText -> Code Int
unconsumedText :: !(Code Int)
}

data PartialStaOffset = StaO !(Code Int#) {-# UNPACK #-} !Int

dynOffset :: PartialStaOffset -> Code Int#
dynOffset :: PartialStaOffset -> Code Int#
dynOffset (StaO Code Int#
qi Int
0) = Code Int#
qi
dynOffset (StaO Code Int#
qi Int
n)
 | Int
n forall a. Ord a => a -> a -> Bool
> Int
0 = let !(I# Int#
n#) = Int
n in [||$$qi +# n#||]
 | Bool
otherwise = let !(I# Int#
m#) = forall a. Num a => a -> a
negate Int
n in [||$$qi -# m#||]

{-|
Initialises an `UnpackedLazyByteString` with a specified offset.
This offset varies as each lazy chunk is consumed.

@since 1.0.0.0
-}
emptyUnpackedLazyByteString :: Code Int# -> Code UnpackedLazyByteString
emptyUnpackedLazyByteString :: Code Int# -> Code UnpackedLazyByteString
emptyUnpackedLazyByteString Code Int#
qi# = [|| (# $$(qi#), nullAddr#, error "nullForeignPtr", 0#, 0#, Lazy.Empty #) ||]

{- Representation Mappings -}
-- NOTE: When a new input type is added here, it needs an Input instance in Parsley.Backend.Machine
{-|
The representation type of an input `Rep`, does not have to be a lifted type. To match a
representation of an input with the correct kind, this type family must be used.

@since 1.0.0.0
-}
type RepKind :: Type -> RuntimeRep
type family RepKind input where
  RepKind String = 'TupleRep '[IntRep, LiftedRep]
  RepKind (UArray Int Char) = IntRep
  RepKind Text16 = LiftedRep
  RepKind ByteString = IntRep
  RepKind Text = LiftedRep
  RepKind Lazy.ByteString = 'TupleRep '[IntRep, AddrRep, LiftedRep, IntRep, IntRep, LiftedRep]
  RepKind CharList = 'TupleRep '[IntRep, LiftedRep]
  RepKind Stream = 'TupleRep '[IntRep, LiftedRep]

{-|
This type family relates a user input type with the underlying parsley
representation, which is significantly more efficient to work with.
Most parts of the machine work with `Rep`.

@since 1.0.0.0
-}
type DynRep :: forall (rep :: Type) -> TYPE (RepKind rep)
type family DynRep input where
  DynRep String = (# Int#, String #)
  DynRep (UArray Int Char) = Int#
  DynRep Text16 = Text
  DynRep ByteString = Int#
  DynRep Text = Text
  DynRep Lazy.ByteString = UnpackedLazyByteString
  DynRep CharList = (# Int#, String #)
  DynRep Stream = (# Int#, Stream #)

type family StaRep input where
  StaRep String = PartialStaOffWith String
  StaRep (UArray Int Char) = PartialStaOffset
  StaRep Text16 = PartialStaText
  StaRep ByteString = PartialStaOffset
  StaRep Text = PartialStaText
  StaRep Lazy.ByteString = Code UnpackedLazyByteString --TODO: could refine
  StaRep CharList = PartialStaOffWith String
  StaRep Stream = PartialStaOffWith Stream

{- Generic Representation Operations -}
{-|
Verifies that two `Int#`s are equal.

@since 1.0.0.0
-}
intSame :: Code Int# -> Code Int# -> Code Bool
intSame :: Code Int# -> Code Int# -> Code Bool
intSame Code Int#
qi# Code Int#
qj# = [||isTrue# ($$(qi#) ==# $$(qj#))||]

{-|
Is the first argument is less than the second?

@since 2.3.0.0
-}
intLess :: Code Int# -> Code Int# -> Code a -> Code a -> Code a
intLess :: forall a. Code Int# -> Code Int# -> Code a -> Code a -> Code a
intLess Code Int#
qi# Code Int#
qj# Code a
yes Code a
no = [||
    case $$(qi#) <# $$(qj#) of
      1# -> $$yes
      0# -> $$no
  ||]

intAdd ::  PartialStaOffset -> Int -> PartialStaOffset
intAdd :: PartialStaOffset -> Int -> PartialStaOffset
intAdd (StaO Code Int#
qo Int
n) Int
i = Code Int# -> Int -> PartialStaOffset
StaO Code Int#
qo (Int
n forall a. Num a => a -> a -> a
+ Int
i)

intSubNonNeg ::  PartialStaOffset -> Int -> PartialStaOffset
intSubNonNeg :: PartialStaOffset -> Int -> PartialStaOffset
intSubNonNeg (StaO Code Int#
qo Int
n) Int
i
  | Int
n forall a. Ord a => a -> a -> Bool
>= Int
i = Code Int# -> Int -> PartialStaOffset
StaO Code Int#
qo (Int
n forall a. Num a => a -> a -> a
- Int
i)
  | Bool
otherwise = let !(I# Int#
m#) = forall a. Num a => a -> a
negate (Int
n forall a. Num a => a -> a -> a
- Int
i) in Code Int# -> Int -> PartialStaOffset
StaO [||max# ($$qo -# m#) 0#||] Int
0

{-|
Extracts the offset from `Text`.

@since 1.0.0.0
-}
-- FIXME: not accurate? this can be slow without consequence
offsetText :: PartialStaText -> Code Int
offsetText :: PartialStaText -> Code Int
offsetText PartialStaText
pt = forall a. PartialStaText -> (StaText -> Code a) -> Code a
staText PartialStaText
pt StaText -> Code Int
offText

{-|
Shifts an `OffWith` to the right, taking care to also drop tokens from the
companion input.

@since 1.0.0.0
-}
offWithShiftRight :: Code (Int -> ts -> ts)        -- ^ A @drop@ function for underlying input.
                  -> PartialStaOffset -> Code ts   -- ^ The `OffWith` to shift.
                  -> Int                           -- ^ How much to shift by.
                  -> (PartialStaOffset, Code ts)
offWithShiftRight :: forall ts.
Code (Int -> ts -> ts)
-> PartialStaOffset
-> Code ts
-> Int
-> (PartialStaOffset, Code ts)
offWithShiftRight Code (Int -> ts -> ts)
_ PartialStaOffset
po Code ts
qts Int
0 = (PartialStaOffset -> Int -> PartialStaOffset
intAdd PartialStaOffset
po Int
0, Code ts
qts)
offWithShiftRight Code (Int -> ts -> ts)
drop PartialStaOffset
po Code ts
qts Int
n = (PartialStaOffset -> Int -> PartialStaOffset
intAdd PartialStaOffset
po Int
n, [|| $$drop n $$qts ||])

{-|
Drops tokens off of a `Stream`.

@since 1.0.0.0
-}
dropStream :: Int -> Stream -> Stream
dropStream :: Int -> Stream -> Stream
dropStream Int
0 Stream
cs = Stream
cs
dropStream Int
n (Char
_ :> Stream
cs) = Int -> Stream -> Stream
dropStream (Int
nforall a. Num a => a -> a -> a
-Int
1) Stream
cs

{-|
Drops tokens off of `Text`.

@since 2.3.0.0
-}
textShiftRight :: Code Text -> Int# -> Code Text
textShiftRight :: Code Text -> Int# -> Code Text
textShiftRight Code Text
t Int#
0# = Code Text
t
textShiftRight Code Text
t Int#
n = [||textShiftRight# $$t n||]

{-# INLINABLE textShiftRight# #-}
textShiftRight# :: Text -> Int# -> Text
textShiftRight# :: Text -> Int# -> Text
textShiftRight# (Text Array
arr Int
off Int
unconsumed) Int#
i = Int# -> Array -> Int -> Int -> Text
go Int#
i Array
arr Int
off Int
unconsumed
  where
    go :: Int# -> Array -> Int -> Int -> Text
go Int#
0# !Array
arr !Int
off !Int
unconsumed = Array -> Int -> Int -> Text
Text Array
arr Int
off Int
unconsumed
    go Int#
n !Array
arr !Int
off !Int
unconsumed
      | Int
unconsumed forall a. Ord a => a -> a -> Bool
> Int
0 = let !d :: Int
d = Text -> Int -> Int
iter_ (Array -> Int -> Int -> Text
Text Array
arr Int
off Int
unconsumed) Int
0
                         in Int# -> Array -> Int -> Int -> Text
go (Int#
n Int# -> Int# -> Int#
-# Int#
1#) Array
arr (Int
off forall a. Num a => a -> a -> a
+ Int
d) (Int
unconsumed forall a. Num a => a -> a -> a
- Int
d)
      | Bool
otherwise = Array -> Int -> Int -> Text
Text Array
arr Int
off Int
0

{-|
Rewinds input consumption on `Text` where the input is still available (i.e. in the same chunk).

@since 2.3.0.0
-}
textShiftLeft :: Code Text -> Int# -> Code Text
textShiftLeft :: Code Text -> Int# -> Code Text
textShiftLeft Code Text
t Int#
0# = Code Text
t
textShiftLeft Code Text
t Int#
n = [||textShiftLeft# $$t n||]

textShiftLeft# :: Text -> Int# -> Text
textShiftLeft# :: Text -> Int# -> Text
textShiftLeft# (Text Array
arr Int
off Int
unconsumed) Int#
i = Int# -> Int -> Int -> Text
go Int#
i Int
off Int
unconsumed
  where
    go :: Int# -> Int -> Int -> Text
go Int#
0# Int
off' Int
unconsumed' = Array -> Int -> Int -> Text
Text Array
arr Int
off' Int
unconsumed'
    go Int#
n Int
off' Int
unconsumed'
      | Int
off' forall a. Ord a => a -> a -> Bool
> Int
0 = let !d :: Int
d = Text -> Int -> Int
reverseIter_ (Array -> Int -> Int -> Text
Text Array
arr Int
off' Int
unconsumed') Int
0 in Int# -> Int -> Int -> Text
go (Int#
n Int# -> Int# -> Int#
-# Int#
1#) (Int
off' forall a. Num a => a -> a -> a
+ Int
d) (Int
unconsumed' forall a. Num a => a -> a -> a
- Int
d)
      | Bool
otherwise = Array -> Int -> Int -> Text
Text Array
arr Int
0 Int
unconsumed'

{-# INLINE emptyUnpackedLazyByteString' #-}
emptyUnpackedLazyByteString' :: Int# -> UnpackedLazyByteString
emptyUnpackedLazyByteString' :: Int# -> UnpackedLazyByteString
emptyUnpackedLazyByteString' Int#
i# = (# Int#
i#, Addr#
nullAddr#, forall a. HasCallStack => [Char] -> a
error [Char]
"nullForeignPtr", Int#
0#, Int#
0#, ByteString
Lazy.Empty #)

{-# INLINABLE byteStringNext #-}
byteStringNext :: UnpackedLazyByteString -> (# Char, UnpackedLazyByteString #)
byteStringNext :: UnpackedLazyByteString -> (# Char, UnpackedLazyByteString #)
byteStringNext (# Int#
i#, Addr#
addr#, ForeignPtrContents
final, Int#
off#, Int#
size#, ByteString
cs #) =
  case forall d. Addr# -> Int# -> State# d -> (# State# d, Word8# #)
readWord8OffAddr# Addr#
addr# Int#
off# State# RealWorld
realWorld# of
    (# State# RealWorld
s', Word8#
x #) -> case touch# :: forall a. a -> State# RealWorld -> State# RealWorld
touch# ForeignPtrContents
final State# RealWorld
s' of
      State# RealWorld
_ -> (# Char# -> Char
C# (Int# -> Char#
chr# (Word# -> Int#
word2Int# (Word8# -> Word#
word8ToWord# Word8#
x))),
          if Int# -> Int
I# Int#
size# forall a. Eq a => a -> a -> Bool
/= Int
1 then (# Int#
i# Int# -> Int# -> Int#
+# Int#
1#, Addr#
addr#, ForeignPtrContents
final, Int#
off# Int# -> Int# -> Int#
+# Int#
1#, Int#
size# Int# -> Int# -> Int#
-# Int#
1#, ByteString
cs #)
          else case ByteString
cs of
            Lazy.Chunk (PS (ForeignPtr Addr#
addr'# ForeignPtrContents
final') (I# Int#
off'#) (I# Int#
size'#)) ByteString
cs' ->
              (# Int#
i# Int# -> Int# -> Int#
+# Int#
1#, Addr#
addr'#, ForeignPtrContents
final', Int#
off'#, Int#
size'#, ByteString
cs' #)
            ByteString
Lazy.Empty -> Int# -> UnpackedLazyByteString
emptyUnpackedLazyByteString' (Int#
i# Int# -> Int# -> Int#
+# Int#
1#)
        #)

{-|
Drops tokens off of a lazy `Lazy.ByteString`.

@since 2.3.0.0
-}
byteStringShiftRight :: Code UnpackedLazyByteString -> Int# -> Code UnpackedLazyByteString
byteStringShiftRight :: Code UnpackedLazyByteString -> Int# -> Code UnpackedLazyByteString
byteStringShiftRight Code UnpackedLazyByteString
t Int#
0# = Code UnpackedLazyByteString
t
byteStringShiftRight Code UnpackedLazyByteString
t Int#
n = [||byteStringShiftRight# $$t n||]

{-# INLINABLE byteStringShiftRight# #-}
byteStringShiftRight# :: UnpackedLazyByteString -> Int# -> UnpackedLazyByteString
byteStringShiftRight# :: UnpackedLazyByteString -> Int# -> UnpackedLazyByteString
byteStringShiftRight# (# Int#
i#, Addr#
addr#, ForeignPtrContents
final, Int#
off#, Int#
size#, ByteString
cs #) Int#
j#
  | Int# -> Bool
isTrue# (Int#
j# Int# -> Int# -> Int#
<# Int#
size#)  = (# Int#
i# Int# -> Int# -> Int#
+# Int#
j#, Addr#
addr#, ForeignPtrContents
final, Int#
off# Int# -> Int# -> Int#
+# Int#
j#, Int#
size# Int# -> Int# -> Int#
-# Int#
j#, ByteString
cs #)
  | Bool
otherwise = case ByteString
cs of
    Lazy.Chunk (PS (ForeignPtr Addr#
addr'# ForeignPtrContents
final') (I# Int#
off'#) (I# Int#
size'#)) ByteString
cs' -> UnpackedLazyByteString -> Int# -> UnpackedLazyByteString
byteStringShiftRight# (# Int#
i# Int# -> Int# -> Int#
+# Int#
size#, Addr#
addr'#, ForeignPtrContents
final', Int#
off'#, Int#
size'#, ByteString
cs' #) (Int#
j# Int# -> Int# -> Int#
-# Int#
size#)
    ByteString
Lazy.Empty -> Int# -> UnpackedLazyByteString
emptyUnpackedLazyByteString' (Int#
i# Int# -> Int# -> Int#
+# Int#
size#)

{-|
Rewinds input consumption on a lazy `Lazy.ByteString` if input is still available (within the same chunk).

@since 2.3.0.0
-}
byteStringShiftLeft :: Code UnpackedLazyByteString -> Int# -> Code UnpackedLazyByteString
byteStringShiftLeft :: Code UnpackedLazyByteString -> Int# -> Code UnpackedLazyByteString
byteStringShiftLeft Code UnpackedLazyByteString
t Int#
0# = Code UnpackedLazyByteString
t
byteStringShiftLeft Code UnpackedLazyByteString
t Int#
n = [||byteStringShiftLeft# $$t n||]

{-# INLINABLE byteStringShiftLeft# #-}
byteStringShiftLeft# :: UnpackedLazyByteString -> Int# -> UnpackedLazyByteString
byteStringShiftLeft# :: UnpackedLazyByteString -> Int# -> UnpackedLazyByteString
byteStringShiftLeft# (# Int#
i#, Addr#
addr#, ForeignPtrContents
final, Int#
off#, Int#
size#, ByteString
cs #) Int#
j# =
  let d# :: Int#
d# = Int# -> Int# -> Int#
min# Int#
off# Int#
j#
  in (# Int#
i# Int# -> Int# -> Int#
-# Int#
d#, Addr#
addr#, ForeignPtrContents
final, Int#
off# Int# -> Int# -> Int#
-# Int#
d#, Int#
size# Int# -> Int# -> Int#
+# Int#
d#, ByteString
cs #)

{-|
Finds the minimum of two `Int#` values.

@since 1.0.0.0
-}
{-# INLINABLE min# #-}
min# :: Int# -> Int# -> Int#
min# :: Int# -> Int# -> Int#
min# Int#
i# Int#
j# = case Int#
i# Int# -> Int# -> Int#
<# Int#
j# of
  Int#
0# -> Int#
j#
  Int#
_  -> Int#
i#

{-|
Finds the maximum of two `Int#` values.

@since 1.0.0.0
-}
{-# INLINABLE max# #-}
max# :: Int# -> Int# -> Int#
max# :: Int# -> Int# -> Int#
max# Int#
i# Int#
j# = case Int#
i# Int# -> Int# -> Int#
<# Int#
j# of
  Int#
0# -> Int#
i#
  Int#
_  -> Int#
j#