{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Regex.Base.RegexLike
-- Copyright   :  (c) Chris Kuklewicz 2006
-- License     :  BSD-style (see the file LICENSE)
-- 
-- Maintainer  :  libraries@haskell.org, textregexlazy@personal.mightyreason.com
-- Stability   :  experimental
-- Portability :  non-portable (MPTC+FD)
--
-- Classes and instances for Regex matching.
--
-- 
-- All the classes are declared here, and some common type aliases, and
-- the MatchResult data type.
-- 
-- The only instances here are for Extract String and Extract ByteString.
-- There are no data values.  The 'RegexContext' instances are in
-- "Text.Regex.Base.Context", except for ones which run afoul of a
-- repeated variable (RegexContext regex a a), which are defined in each
-- modules' String and ByteString modules.
-----------------------------------------------------------------------------

module Text.Regex.Base.RegexLike (
  -- ** Type aliases
  MatchOffset,
  MatchLength,
  MatchArray,
  MatchText,
  -- ** Data types
  MatchResult(..),
  -- ** Classes
  RegexOptions(..),
  RegexMaker(..),
  RegexLike(..),
  RegexContext(..),
  Extract(..),
  AllSubmatches(..),AllTextSubmatches(..),AllMatches(..),AllTextMatches(..)
  ) where

import Data.Array(Array,(!))
import Data.Maybe(isJust)
import qualified Data.ByteString as B (take,drop,empty,ByteString)
import qualified Data.ByteString.Lazy as L (take,drop,empty,ByteString)
import qualified Data.Sequence as S(take,drop,empty,Seq)

-- | 0 based index from start of source, or (-1) for unused
type MatchOffset = Int
-- | non-negative length of a match
type MatchLength = Int
-- | 0 based array, with 0th index indicating the full match.  If the
-- full match location is not available, represent as (0,0).
type MatchArray = Array Int (MatchOffset,MatchLength)
type MatchText source = Array Int (source,(MatchOffset,MatchLength))

-- | This is the same as the type from JRegex.
data MatchResult a = MR {
    mrBefore :: a,
    mrMatch  :: a,
    mrAfter  :: a,
    mrSubList :: [a],
    mrSubs   :: Array Int a
}

----------------
-- | Rather than carry them around spearately, the options for how to
-- execute a regex are kept as part of the regex.  There are two types
-- of options.  Those that can only be specified at compilation time
-- and never changed are CompOpt.  Those that can be changed later and
-- affect how matching is performed are ExecOpt.  The actually types
-- for these depend on the backend.
class RegexOptions regex compOpt execOpt
  | regex->compOpt execOpt, compOpt->regex execOpt, execOpt->regex compOpt where
  blankCompOpt :: compOpt    -- ^ no options set at all in the backend
  blankExecOpt :: execOpt    -- ^ no options set at all in the backend
  defaultCompOpt :: compOpt  -- ^ reasonable options (extended,caseSensitive,multiline regex)
  defaultExecOpt :: execOpt  -- ^ reasonable options (extended,caseSensitive,multiline regex)
  setExecOpts :: execOpt -> regex -> regex
  -- ^ forget old flags and use new ones
  getExecOpts :: regex -> execOpt
  -- ^ retrieve the current flags

----------------
-- | RegexMaker captures the creation of the compiled regular
-- expression from a source type and an option type.  'makeRegexM' and
-- 'makeRegexM' report parse error using 'MonadError', usually (Either
-- String regex).
-- 
-- The 'makeRegex' function has a default implementation that depends
-- on makeRegexOpts and used 'defaultCompOpt' and 'defaultExecOpt'.
-- Similarly for 'makeRegexM' and 'makeRegexOptsM'.
--
-- There are also default implementaions for 'makeRegexOpts' and
-- 'makeRegexOptsM' in terms of each other.  So a minimal instance
-- definition needs to only define one of these, hopefully
-- 'makeRegexOptsM'.
class (RegexOptions regex compOpt execOpt) => RegexMaker regex compOpt execOpt source
  | regex -> compOpt execOpt, compOpt -> regex execOpt, execOpt -> regex compOpt where
  -- | make using the defaultCompOpt and defaultExecOpt
  makeRegex :: source -> regex
  -- | Specify your own options
  makeRegexOpts :: compOpt -> execOpt -> source -> regex
  -- | make using the defaultCompOpt and defaultExecOpt, reporting errors with fail
  makeRegexM :: (Monad m) => source -> m regex
  -- | Specify your own options, reporting errors with fail
  makeRegexOptsM :: (Monad m) => compOpt -> execOpt -> source -> m regex

  makeRegex = makeRegexOpts defaultCompOpt defaultExecOpt
  makeRegexM = makeRegexOptsM defaultCompOpt defaultExecOpt
  makeRegexOpts c e s = maybe (error "makeRegexOpts failed") id (makeRegexOptsM c e s)
  makeRegexOptsM c e s = return (makeRegexOpts c e s)

----------------
-- | RegexLike is parametrized on a regular expression type and a
-- source type to run the matching on.
--
-- There are default implementations: matchTest and matchOnceText use
-- matchOnce; matchCount and matchAllText use matchAll. matchOnce uses
-- matchOnceText and matchAll uses matchAllText. So a minimal complete
-- instance need to provide at least (matchOnce or matchOnceText) and
-- (matchAll or matchAllText).  Additional definitions are often
-- provided where they will increase efficiency.
--
-- > [ c | let notVowel = makeRegex "[^aeiou]" :: Regex, c <- ['a'..'z'], matchTest notVowel [c]  ]
-- >
-- > "bcdfghjklmnpqrstvwxyz"
--
-- The strictness of these functions is instance dependent.
class (Extract source)=> RegexLike regex source where
  -- | This returns the first match in the source (it checks the whole
  -- source, not just at the start). This returns an array of
  -- (offset,length) index pairs for the match and captured
  -- substrings.  The offset is 0-based.  A (-1) for an offset means a
  -- failure to match.  The lower bound of the array is 0, and the 0th
  -- element is the (offset,length) for the whole match.
  matchOnce  :: regex -> source-> Maybe MatchArray
  -- | matchAll returns a list of matches.  The matches are in order
  -- and do not overlap. If any match succeeds but has 0 length then
  -- this will be the last match in the list.
  matchAll   :: regex -> source-> [MatchArray]
  -- | matchCount returns the number of non-overlapping matches
  -- returned by matchAll.
  matchCount :: regex -> source-> Int
  -- | matchTest return True if there is a match somewhere in the
  -- source (it checks the whole source not just at the start).
  matchTest  :: regex -> source-> Bool
  -- | This is matchAll with the actual subsections of the source
  -- instead of just the (offset,length) information.
  matchAllText  :: regex -> source-> [MatchText source]
  -- | This can return a tuple of three items: the source before the
  -- match, an array of the match and captured substrings (with their
  -- indices), and the source after the match.
  matchOnceText :: regex -> source-> Maybe (source,MatchText source,source)

  matchAll regex source = map (fmap snd) (matchAllText regex source)
  matchOnce regex source = fmap (\(_,mt,_) -> fmap snd mt) (matchOnceText regex source)
  matchTest regex source = isJust (matchOnce regex source)
  matchCount regex source = length (matchAll regex source)
  matchOnceText regex source =
    fmap (\ma -> let (o,l) = ma!0
                 in (before o source
                    ,fmap (\ol -> (extract ol source,ol)) ma
                    ,after (o+l) source))
         (matchOnce regex source)
  matchAllText regex source =
    map (fmap (\ol -> (extract ol source,ol)))
        (matchAll regex source)

----------------
-- | RegexContext is the polymorphic interface to do matching.  Since
-- 'target' is polymorphic you may need to suply the type explicitly
-- in contexts where it cannot be inferred.
--
-- The monadic 'matchM' version uses 'fail' to report when the 'regex'
-- has no match in 'source'.  Two examples:
--
-- Here the contest 'Bool' is inferred:
--
-- > [ c | let notVowel = makeRegex "[^aeiou]" :: Regex, c <- ['a'..'z'], match notVowel [c]  ]
-- >
-- > "bcdfghjklmnpqrstvwxyz"
--
-- Here the context '[String]' must be supplied:
--
-- > let notVowel = (makeRegex "[^aeiou]" :: Regex )
-- > in do { c <- ['a'..'z'] ; matchM notVowel [c] } :: [String]
-- >
-- > ["b","c","d","f","g","h","j","k","l","m","n","p","q","r","s","t","v","w","x","y","z"]
class (RegexLike regex source) => RegexContext regex source target where
  match :: regex -> source -> target
  matchM :: (Monad m) => regex -> source -> m target

----------------
-- | Extract allows for indexing operations on String or ByteString.
class Extract source where
  -- | before is a renamed "take"
  before :: Int -> source -> source
  -- | after is a renamed "drop"
  after :: Int -> source -> source
  -- | For when there is no match, this can construct an empty data value
  empty :: source
  -- | extract takes an offset and length and has a default
  -- implementation of @extract (off,len) source = before len (after
  -- off source)@
  extract :: (Int,Int) -> source -> source
  extract (off,len) source = before len (after off source)

instance Extract String where
  before =  take; after = drop; empty = []

instance Extract B.ByteString where
  before = B.take; after = B.drop; empty = B.empty

instance Extract L.ByteString where
  before = L.take . toEnum; after = L.drop . toEnum; empty = L.empty

instance Extract (S.Seq a) where
  before = S.take; after = S.drop; empty = S.empty

-- | Used in results of RegexContext instances
newtype AllSubmatches f b = AllSubmatches {getAllSubmatches :: (f b)}
-- | Used in results of RegexContext instances
newtype AllTextSubmatches f b = AllTextSubmatches {getAllTextSubmatches :: (f b)}
-- | Used in results of RegexContext instances
newtype AllMatches f b = AllMatches {getAllMatches :: (f b)}
-- | Used in results of RegexContext instances
newtype AllTextMatches f b = AllTextMatches {getAllTextMatches :: (f b) }