{-# language BangPatterns #-}
{-# language BinaryLiterals #-}
{-# language DataKinds #-}
{-# language DeriveFunctor #-}
{-# language DerivingStrategies #-}
{-# language GADTSyntax #-}
{-# language KindSignatures #-}
{-# language LambdaCase #-}
{-# language MagicHash #-}
{-# language MultiWayIf #-}
{-# language PolyKinds #-}
{-# language RankNTypes #-}
{-# language ScopedTypeVariables #-}
{-# language StandaloneDeriving #-}
{-# language TypeApplications #-}
{-# language UnboxedSums #-}
{-# language UnboxedTuples #-}
module Data.Bytes.Parser.Ascii
(
Latin.char
, Latin.char2
, Latin.char3
, Latin.char4
, any
, any#
, peek
, opt
, shortTrailedBy
, takeShortWhile
, Latin.skipDigits
, Latin.skipDigits1
, Latin.skipChar
, Latin.skipChar1
, skipAlpha
, skipAlpha1
, skipTrailedBy
, skipWhile
, Latin.decWord
, Latin.decWord8
, Latin.decWord16
, Latin.decWord32
) where
import Prelude hiding (length,any,fail,takeWhile)
import Data.Bytes.Types (Bytes(..))
import Data.Bytes.Parser.Internal (Parser(..),uneffectful,Result#,uneffectful#)
import Data.Bytes.Parser.Internal (InternalResult(..),indexLatinCharArray,upcastUnitSuccess)
import Data.Word (Word8)
import Data.Text.Short (ShortText)
import Control.Monad.ST.Run (runByteArrayST)
import GHC.Exts (Int(I#),Char(C#),Int#,Char#,(-#),(+#),(<#),ord#,indexCharArray#,chr#)
import GHC.Exts (gtChar#)
import qualified Data.ByteString.Short.Internal as BSS
import qualified Data.Text.Short.Unsafe as TS
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Parser.Latin as Latin
import qualified Data.Bytes.Parser.Unsafe as Unsafe
import qualified Data.Primitive as PM
skipTrailedBy :: e -> Char -> Parser e s ()
skipTrailedBy e !c = do
let go = do
!d <- any e
if d == c
then pure ()
else go
go
takeShortWhile :: (Char -> Bool) -> Parser e s ShortText
{-# inline takeShortWhile #-}
takeShortWhile p = do
!start <- Unsafe.cursor
skipWhile p
end <- Unsafe.cursor
src <- Unsafe.expose
let len = end - start
!r = runByteArrayST $ do
marr <- PM.newByteArray len
PM.copyByteArray marr 0 src start len
PM.unsafeFreezeByteArray marr
pure
$ TS.fromShortByteStringUnsafe
$ byteArrayToShortByteString
$ r
shortTrailedBy :: e -> Char -> Parser e s ShortText
shortTrailedBy e !c = do
!start <- Unsafe.cursor
skipTrailedBy e c
end <- Unsafe.cursor
src <- Unsafe.expose
let len = end - start - 1
!r = runByteArrayST $ do
marr <- PM.newByteArray len
PM.copyByteArray marr 0 src start len
PM.unsafeFreezeByteArray marr
pure
$ TS.fromShortByteStringUnsafe
$ byteArrayToShortByteString
$ r
any :: e -> Parser e s Char
any e = uneffectful $ \chunk -> if length chunk > 0
then
let c = indexLatinCharArray (array chunk) (offset chunk)
in if c < '\128'
then InternalSuccess c (offset chunk + 1) (length chunk - 1)
else InternalFailure e
else InternalFailure e
any# :: e -> Parser e s Char#
{-# inline any# #-}
any# e = Parser
(\(# arr, off, len #) s0 -> case len of
0# -> (# s0, (# e | #) #)
_ ->
let !w = indexCharArray# arr off
in case ord# w <# 128# of
1# -> (# s0, (# | (# w, off +# 1#, len -# 1# #) #) #)
_ -> (# s0, (# e | #) #)
)
unI :: Int -> Int#
unI (I# w) = w
peek :: e -> Parser e s Char
peek e = uneffectful $ \chunk -> if length chunk > 0
then
let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
in if w < 128
then InternalSuccess
(C# (chr# (unI (fromIntegral w))))
(offset chunk)
(length chunk)
else InternalFailure e
else InternalFailure e
opt :: e -> Parser e s (Maybe Char)
{-# inline opt #-}
opt e = uneffectful $ \chunk -> if length chunk > 0
then
let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8
in if w < 128
then InternalSuccess
(Just (C# (chr# (unI (fromIntegral w)))))
(offset chunk + 1)
(length chunk - 1)
else InternalFailure e
else InternalSuccess Nothing (offset chunk) (length chunk)
skipWhile :: (Char -> Bool) -> Parser e s ()
{-# inline skipWhile #-}
skipWhile p = Parser
( \(# arr, off0, len0 #) s0 ->
let go off len = case len of
0# -> (# (), off, 0# #)
_ -> let c = indexCharArray# arr off in
case p (C# c) of
True -> case gtChar# c '\x7F'# of
1# -> (# (), off, len #)
_ -> go (off +# 1# ) (len -# 1# )
False -> (# (), off, len #)
in (# s0, (# | go off0 len0 #) #)
)
skipAlpha :: Parser e s ()
skipAlpha = uneffectful# $ \c ->
upcastUnitSuccess (skipAlphaAsciiLoop c)
skipAlpha1 :: e -> Parser e s ()
skipAlpha1 e = uneffectful# $ \c ->
skipAlphaAsciiLoop1Start e c
skipAlphaAsciiLoop ::
Bytes
-> (# Int#, Int# #)
skipAlphaAsciiLoop !c = if length c > 0
then
let w = indexLatinCharArray (array c) (offset c)
in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z')
then skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c)
else (# unI (offset c), unI (length c) #)
else (# unI (offset c), unI (length c) #)
skipAlphaAsciiLoop1Start ::
e
-> Bytes
-> Result# e ()
skipAlphaAsciiLoop1Start e !c = if length c > 0
then
let w = indexLatinCharArray (array c) (offset c)
in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z')
then upcastUnitSuccess (skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c))
else (# e | #)
else (# e | #)
byteArrayToShortByteString :: PM.ByteArray -> BSS.ShortByteString
byteArrayToShortByteString (PM.ByteArray x) = BSS.SBS x