module Sasha (
    -- * Sasha the lexer

    -- | This is the ordinary Haskell (i.e. slow) interface.
    --
    -- The fast one is in "Sasha.TTH" module, but that requires @TemplateHaskell@.
    --
    Sasha,
    sasha,
    -- * ERE specification
    ERE,
    empty,
    eps,
    char,
    charRange,
    charSet,
    utf8Char,
    anyChar,
    anyUtf8Char,
    appends,
    unions,
    intersections,
    star,
    plus,
    string,
    utf8String,
    complement,
    satisfy,
    digit,
) where

import Data.Word (Word8)

import qualified Data.ByteString as BS

import Sasha.Internal.ERE

-- | Lexer grammar specification: regular expression and result builder function
-- which takes a prefix (the matching part) and a suffix (the rest of input).
type Sasha r = [(ERE, BS.ByteString -> BS.ByteString -> r)]

-- | Scan for a single token.
sasha
    :: forall r. r    -- ^ no match value
    -> Sasha r        -- ^ scanner rules definitions
    -> BS.ByteString  -- ^ input
    -> r              -- ^ result
sasha noMatch grammar input0 = go noMatch 0 input0 grammar
  where
    -- Note: acc has to be lazy
    go :: r -> Int -> BS.ByteString -> Sasha r -> r
    go acc !_ !_       [] = acc
    go acc !i !input   ts = case BS.uncons input of
        Nothing          -> acc
        Just (c, input') -> go (next accs acc) (i + 1) input' ts'
          where
            ts' = derivativeSasha c ts
            accs = [ case BS.splitAt (i + 1) input0 of (pfx, sfx) -> f pfx sfx | (ere, f) <- ts', nullable ere]

            next []    x = x
            next (x:_) _ = x

derivativeSasha :: Word8 -> Sasha r -> Sasha r
derivativeSasha c ts =
    [ (ere', f)
    | (ere,  f) <- ts
    , let ere' = derivative c ere
    , not (isEmpty ere')
    ]