{-# LANGUAGE BangPatterns           #-}
{-# LANGUAGE FlexibleInstances      #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GADTs                  #-}
{-# LANGUAGE Safe                   #-}
{-# LANGUAGE ScopedTypeVariables    #-}
module Kleene.ERE (
    ERE (..),
    -- * Construction
    --
    -- | Binary operators are
    --
    -- * '<>' for append
    -- * '\/' for union
    -- * '/\' for intersection
    --
    empty,
    eps,
    char,
    charRange,
    anyChar,
    appends,
    unions,
    intersections,
    star,
    string,
    complement,
    -- * Derivative
    nullable,
    derivate,
    -- * Transition map
    transitionMap,
    leadingChars,
    -- * Equivalence
    equivalent,
    -- * Other
    isEmpty,
    isEverything,
    ) where

import Prelude ()
import Prelude.Compat

import Algebra.Lattice
       (BoundedJoinSemiLattice (..), BoundedLattice,
       BoundedMeetSemiLattice (..), JoinSemiLattice (..), Lattice,
       MeetSemiLattice (..))
import Control.Applicative (liftA2)
import Data.Foldable       (toList)
import Data.List           (foldl')
import Data.Map            (Map)
import Data.RangeSet.Map   (RSet)
import Data.Set            (Set)
import Data.String         (IsString (..))

import qualified Data.Function.Step.Discrete.Closed as SF
import qualified Data.Map                           as Map
import qualified Data.RangeSet.Map                  as RSet
import qualified Data.Set                           as Set
import qualified Test.QuickCheck                    as QC

import qualified Kleene.Classes            as C
import qualified Kleene.Internal.Partition as P
import           Kleene.Internal.Pretty

-- | Extended regular expression
--
-- It's both, /Kleene/ and /Boolean/ algebra. (If we add only intersections, it
-- wouldn't be /Boolean/).
--
-- /Note:/ we don't have special constructor for intersections.
-- We use de Morgan formula \(a \land b = \neg (\neg a \lor \neg b)\).
--
-- >>> putPretty $ asEREChar $ "a" /\ "b"
-- ^~(~a|~b)$
--
-- There is no generator, as 'intersections' makes it hard.
--
data ERE c
    = EREChars (RSet c)                -- ^ Single character
    | EREAppend [ERE c]                -- ^ Concatenation
    | EREUnion (RSet c) (Set (ERE c))  -- ^ Union
    | EREStar (ERE c)                  -- ^ Kleene star
    | ERENot (ERE c)                   -- ^ Complement
  deriving (Eq, Ord, Show)

-------------------------------------------------------------------------------
-- Smart constructor
-------------------------------------------------------------------------------

-- | Empty regex. Doesn't accept anything.
--
-- >>> putPretty (empty :: ERE Char)
-- ^[]$
--
-- >>> putPretty (bottom :: ERE Char)
-- ^[]$
--
-- prop> match (empty :: ERE Char) (s :: String) === False
--
empty :: ERE c
empty = EREChars RSet.empty

-- | Everything.
--
-- >>> putPretty (everything :: ERE Char)
-- ^~[]$
--
-- >>> putPretty (top :: ERE Char)
-- ^~[]$
--
-- prop> match (everything :: ERE Char) (s :: String) === True
--
everything :: ERE c
everything = complement empty

-- | Empty string. /Note:/ different than 'empty'.
--
-- >>> putPretty eps
-- ^$
--
-- >>> putPretty (mempty :: ERE Char)
-- ^$
--
-- prop> match (eps :: ERE Char) s === null (s :: String)
--
eps :: ERE c
eps = EREAppend []

-- |
--
-- >>> putPretty (char 'x')
-- ^x$
--
char :: c -> ERE c
char = EREChars . RSet.singleton

-- |
--
-- >>> putPretty $ charRange 'a' 'z'
-- ^[a-z]$
--
charRange :: Ord c => c -> c -> ERE c
charRange c c' = EREChars $ RSet.singletonRange (c, c')

-- | Any character. /Note:/ different than dot!
--
-- >>> putPretty anyChar
-- ^[^]$
--
anyChar :: Bounded c => ERE c
anyChar = EREChars RSet.full

-- | Concatenate regular expressions.
--
-- prop> asEREChar r <> empty === empty
-- prop> empty <> asEREChar r === empty
-- prop> (asEREChar r <> s) <> t === r <> (s <> t)
--
-- prop> asEREChar r <> eps === r
-- prop> eps <> asEREChar r === r
--
appends :: Eq c => [ERE c] -> ERE c
appends rs0
    | elem empty rs1 = empty
    | otherwise = case rs1 of
        [r] -> r
        rs  -> EREAppend rs
  where
    -- flatten one level of EREAppend
    rs1 = concatMap f rs0

    f (EREAppend rs) = rs
    f r             = [r]

-- | Union of regular expressions.
--
-- prop> asEREChar r \/ r === r
-- prop> asEREChar r \/ s === s \/ r
-- prop> (asEREChar r \/ s) \/ t === r \/ (s \/ t)
--
-- prop> empty \/ asEREChar r === r
-- prop> asEREChar r \/ empty === r
--
-- prop> everything \/ asREChar r === everything
-- prop> asREChar r \/ everything === everything
--
unions :: (Ord c, Enum c) => [ERE c] -> ERE c
unions = uncurry mk . foldMap f where
    mk cs rss
        | Set.null rss = EREChars cs
        | Set.member everything rss = everything
        | RSet.null cs = case Set.toList rss of
            []  -> empty
            [r] -> r
            _   -> EREUnion cs rss
        | otherwise    = EREUnion cs rss

    f (EREUnion cs rs) = (cs, rs)
    f (EREChars cs)    = (cs, Set.empty)
    f r                = (mempty, Set.singleton r)

-- | Intersection of regular expressions.
--
-- prop> asEREChar r /\ r === r
-- prop> asEREChar r /\ s === s /\ r
-- prop> (asEREChar r /\ s) /\ t === r /\ (s /\ t)
--
-- prop> empty /\ asEREChar r === empty
-- prop> asEREChar r /\ empty === empty
--
-- prop> everything /\ asREChar r === r
-- prop> asREChar r /\ everything === r
--
intersections :: (Ord c, Enum c) => [ERE c] -> ERE c
intersections = complement . unions . map complement

-- | Complement.
--
-- prop> complement (complement r) === asEREChar r
--
complement :: ERE c -> ERE c
complement r = case r of
    ERENot r' -> r'
    _ -> ERENot r

-- | Kleene star.
--
-- prop> star (star r) === star (asEREChar r)
--
-- prop> star eps     === asEREChar eps
-- prop> star empty   === asEREChar eps
-- prop> star anyChar === asEREChar everything
--
-- prop> star (asREChar r \/ eps) === star r
-- prop> star (char c \/ eps) === star (char (c :: Char))
-- prop> star (empty \/ eps) === eps
--
star :: (Ord c, Bounded c) => ERE c -> ERE c
star r = case r of
    EREStar _                          -> r
    EREAppend []                       -> eps
    EREChars cs | RSet.null cs         -> eps
    EREChars cs | RSet.isFull cs       -> everything
    EREUnion cs rs | Set.member eps rs -> case Set.toList rs' of
        []                  -> star (EREChars cs)
        [r'] | RSet.null cs -> star r'
        _                   -> EREStar (EREUnion cs rs')
      where
        rs' = Set.delete eps rs
    _                                  -> EREStar r

-- | Literal string.
--
-- >>> putPretty ("foobar" :: ERE Char)
-- ^foobar$
--
-- >>> putPretty ("(.)" :: ERE Char)
-- ^\(\.\)$
--
string :: [c] -> ERE c
string []  = eps
string [c] = EREChars (RSet.singleton c)
string cs  = EREAppend $ map (EREChars . RSet.singleton) cs

instance (Ord c, Enum c, Bounded c) => C.Kleene c (ERE c) where
    empty      = empty
    eps        = eps
    char       = char
    appends    = appends
    unions     = unions
    star       = star

instance (Ord c, Enum c, Bounded c) => C.FiniteKleene c (ERE c) where
    everything = everything
    charRange  = charRange
    fromRSet   = EREChars
    anyChar    = anyChar

instance C.Complement c (ERE c) where
    complement = complement

-------------------------------------------------------------------------------
-- derivative
-------------------------------------------------------------------------------

-- | We say that a regular expression r is nullable if the language it defines
-- contains the empty string.
--
-- >>> nullable eps
-- True
--
-- >>> nullable (star "x")
-- True
--
-- >>> nullable "foo"
-- False
--
-- >>> nullable (complement eps)
-- False
--
nullable :: ERE c -> Bool
nullable (EREChars _)      = False
nullable (EREAppend rs)    = all nullable rs
nullable (EREUnion _cs rs) = any nullable rs
nullable (EREStar _)       = True
nullable (ERENot r)        = not (nullable r)

-- | Intuitively, the derivative of a language \(\mathcal{L} \subset \Sigma^\star\)
-- with respect to a symbol \(a \in \Sigma\) is the language that includes only
-- those suffixes of strings with a leading symbol \(a\) in \(\mathcal{L}\).
--
-- >>> putPretty $ derivate 'f' "foobar"
-- ^oobar$
--
-- >>> putPretty $ derivate 'x' $ "xyz" \/ "abc"
-- ^yz$
--
-- >>> putPretty $ derivate 'x' $ star "xyz"
-- ^yz(xyz)*$
--
derivate :: (Ord c, Enum c) => c -> ERE c -> ERE c
derivate c (EREChars cs)     = derivateChars c cs
derivate c (EREUnion cs rs)  = unions $ derivateChars c cs : [ derivate c r | r <- toList rs]
derivate c (EREAppend rs)    = derivateAppend c rs
derivate c rs@(EREStar r)    = derivate c r <> rs
derivate c (ERENot r)        = complement (derivate c r)

instance (Ord c, Enum c) => C.Derivate c (ERE c) where
    nullable = nullable
    derivate = derivate

instance (Ord c, Enum c) => C.Match c (ERE c) where
    match r = nullable . foldl' (flip derivate) r

derivateAppend :: (Enum c, Ord c) => c -> [ERE c] -> ERE c
derivateAppend _ []      = empty
derivateAppend c [r]     = derivate c r
derivateAppend c (r:rs)
    | nullable r         = unions [r' <> appends rs, rs']
    | otherwise          = r' <> appends rs
  where
    r'  = derivate c r
    rs' = derivateAppend c rs

derivateChars :: Ord c =>  c -> RSet c -> ERE c
derivateChars c cs
    | c `RSet.member` cs      = eps
    | otherwise               = empty

-------------------------------------------------------------------------------
-- isEmpty
-------------------------------------------------------------------------------

-- | Whether 'ERE' is (structurally) equal to 'empty'.
isEmpty :: ERE c -> Bool
isEmpty (EREChars rs) = RSet.null rs
isEmpty _            = False

-- | Whether 'ERE' is (structurally) equal to 'everything'.
isEverything :: ERE c -> Bool
isEverything (ERENot (EREChars rs)) = RSet.null rs
isEverything _                      = False

-------------------------------------------------------------------------------
-- States
-------------------------------------------------------------------------------

-- | Transition map. Used to construct 'Kleene.DFA.DFA'.
--
-- >>> void $ Map.traverseWithKey (\k v -> putStrLn $ pretty k ++ " : " ++ SF.showSF (fmap pretty v)) $ transitionMap ("ab" :: ERE Char)
-- ^[]$ : \_ -> "^[]$"
-- ^b$ : \x -> if
--     | x <= 'a'  -> "^[]$"
--     | x <= 'b'  -> "^$"
--     | otherwise -> "^[]$"
-- ^$ : \_ -> "^[]$"
-- ^ab$ : \x -> if
--     | x <= '`'  -> "^[]$"
--     | x <= 'a'  -> "^b$"
--     | otherwise -> "^[]$"
--
transitionMap
    :: forall c. (Ord c, Enum c, Bounded c)
    => ERE c
    -> Map (ERE c) (SF.SF c (ERE c))
transitionMap re = go Map.empty [re] where
    go :: Map (ERE c) (SF.SF c (ERE c))
       -> [ERE c]
       -> Map (ERE c) (SF.SF c (ERE c))
    go !acc [] = acc
    go acc (r : rs)
        | r `Map.member` acc = go acc rs
        | otherwise = go (Map.insert r pm acc) (SF.values pm ++ rs)
      where
        pm = P.toSF (\c -> derivate c r) (leadingChars r)

instance (Ord c, Enum c, Bounded c) => C.TransitionMap c (ERE c) where
    transitionMap = transitionMap

-- | Leading character sets of regular expression.
--
-- >>> leadingChars "foo"
-- fromSeparators "ef"
--
-- >>> leadingChars (star "b" <> star "e")
-- fromSeparators "abde"
--
-- >>> leadingChars (charRange 'b' 'z')
-- fromSeparators "az"
--
leadingChars :: (Ord c, Enum c, Bounded c) => ERE c -> P.Partition c
leadingChars (EREChars cs)    = P.fromRSet cs
leadingChars (EREUnion cs rs) = P.fromRSet cs <> foldMap leadingChars rs
leadingChars (EREStar r)      = leadingChars r
leadingChars (EREAppend rs)   = leadingCharsAppend rs
leadingChars (ERENot r)       = leadingChars r

leadingCharsAppend :: (Ord c, Enum c, Bounded c) => [ERE c] -> P.Partition c
leadingCharsAppend [] = P.whole
leadingCharsAppend (r : rs)
    | nullable r = leadingChars r <> leadingCharsAppend rs
    | otherwise  = leadingChars r

-------------------------------------------------------------------------------
-- Equivalence
-------------------------------------------------------------------------------

-- | Whether two regexps are equivalent.
--
-- @
-- 'equivalent' re1 re2 <=> forall s. 'match' re1 s == 'match' re1 s
-- @
--
-- >>> let re1 = star "a" <> "a"
-- >>> let re2 = "a" <> star "a"
--
-- These are different regular expressions, even we perform
-- some normalisation-on-construction:
--
-- >>> re1 == re2
-- False
--
-- They are however equivalent:
--
-- >>> equivalent re1 re2
-- True
--
-- The algorithm works by executing 'states' on "product" regexp,
-- and checking whether all resulting states are both accepting or rejecting.
--
-- @
-- re1 == re2 ==> 'equivalent' re1 re2
-- @
--
-- === More examples
--
-- >>> let example re1 re2 = putPretty re1 >> putPretty re2 >> return (equivalent re1 re2)
-- >>> example re1 re2
-- ^a*a$
-- ^aa*$
-- True
--
-- >>> example (star "aa") (star "aaa")
-- ^(aa)*$
-- ^(aaa)*$
-- False
--
-- >>> example (star "aa" <> star "aaa") (star "aaa" <> star "aa")
-- ^(aa)*(aaa)*$
-- ^(aaa)*(aa)*$
-- True
--
-- >>> example (star ("a" \/ "b")) (star $ star "a" <> star "b")
-- ^[a-b]*$
-- ^(a*b*)*$
-- True
--
equivalent :: forall c. (Ord c, Enum c, Bounded c) => ERE c -> ERE c -> Bool
equivalent x0 y0 = go mempty [(x0, y0)] where
    go :: Set (ERE c, ERE c) -> [(ERE c, ERE c)] -> Bool
    go !_ [] = True
    go acc (p@(x, y) : zs)
        | p `Set.member` acc = go acc zs
        -- if two regexps are structurally the same, we don't need to recurse.
        | x == y             = go (Set.insert p acc) zs
        | all agree ps       = go (Set.insert p acc) (ps ++ zs)
        | otherwise = False
      where
        cs = toList $ P.examples $ leadingChars x `P.wedge` leadingChars y
        ps = map (\c -> (derivate c x, derivate c y)) cs

    agree :: (ERE c, ERE c) -> Bool
    agree (x, y) = nullable x == nullable y

instance (Ord c, Enum c, Bounded c) => C.Equivalent c (ERE c) where
    equivalent = equivalent

-------------------------------------------------------------------------------
-- Instances
-------------------------------------------------------------------------------

instance Eq c => Semigroup (ERE c) where
    r <> r' = appends [r, r']

instance Eq c => Monoid (ERE c) where
    mempty  = eps
    mappend = (<>)
    mconcat = appends

instance (Ord c, Enum c) => JoinSemiLattice (ERE c) where
    r \/ r' = unions [r, r']

instance (Ord c, Enum c) => BoundedJoinSemiLattice (ERE c) where
    bottom = empty

instance (Ord c, Enum c) => MeetSemiLattice (ERE c) where
    r /\ r' = intersections [r, r']

instance (Ord c, Enum c) => BoundedMeetSemiLattice (ERE c) where
    top = everything

instance  (Ord c, Enum c) => Lattice (ERE c)
instance  (Ord c, Enum c) => BoundedLattice (ERE c)

instance c ~ Char => IsString (ERE c) where
    fromString = string

instance (Ord c, Enum c, Bounded c, QC.Arbitrary c) => QC.Arbitrary (ERE c) where
    arbitrary = QC.sized arb where
        c :: QC.Gen (ERE c)
        c = EREChars . RSet.fromRangeList <$> QC.arbitrary

        arb :: Int -> QC.Gen (ERE c)
        arb n | n <= 0    = QC.oneof [c, fmap char QC.arbitrary, pure eps]
              | otherwise = QC.oneof
            [ c
            , pure eps
            , fmap char QC.arbitrary
            , liftA2 (<>) (arb n2) (arb n2)
            , liftA2 (\/) (arb n2) (arb n2)
            , fmap star (arb n2)
            , fmap complement (arb n2)
            ]
          where
            n2 = n `div` 2

instance (QC.CoArbitrary c) => QC.CoArbitrary (ERE c) where
    coarbitrary (EREChars cs)    = QC.variant (0 :: Int) . QC.coarbitrary (RSet.toRangeList cs)
    coarbitrary (EREAppend rs)   = QC.variant (1 :: Int) . QC.coarbitrary rs
    coarbitrary (EREUnion cs rs) = QC.variant (2 :: Int) . QC.coarbitrary (RSet.toRangeList cs, Set.toList rs)
    coarbitrary (EREStar r)      = QC.variant (3 :: Int) . QC.coarbitrary r
    coarbitrary (ERENot r)       = QC.variant (4 :: Int) . QC.coarbitrary r

-------------------------------------------------------------------------------
-- JavaScript
-------------------------------------------------------------------------------

instance c ~ Char => Pretty (ERE c) where
    prettyS x = showChar '^' . go False x . showChar '$'
      where
        go :: Bool -> ERE Char -> ShowS
        go p (EREStar a)
            = parens p
            $ go True a . showChar '*'
        go p (EREAppend rs)
            = parens p $ goMany id rs
        go p (EREUnion cs rs)
            | RSet.null cs = goUnion p rs
            | Set.null rs  = prettyS cs
            | otherwise    = goUnion p (Set.insert (EREChars cs) rs)
        go _ (EREChars cs)
            = prettyS cs
        go p (ERENot r)
            = parens p $ showChar '~' . go True r

        goUnion p rs
            | Set.member eps rs = parens p $ goUnion' True . showChar '?'
            | otherwise         = goUnion' p
          where
            goUnion' p' = case Set.toList (Set.delete eps rs) of
                [] -> go True empty
                [r] -> go p' r
                (r:rs') -> parens True $ goSome1 (showChar '|') r rs'

        goMany :: ShowS -> [ERE Char] -> ShowS
        goMany sep = foldr (\a b -> go False a . sep . b) id

        goSome1 :: ShowS -> ERE Char -> [ERE Char] -> ShowS
        goSome1 sep r = foldl (\a b -> a . sep . go False b) (go False r)

        parens :: Bool -> ShowS -> ShowS
        parens True  s = showString "(" . s . showChar ')'
        parens False s = s

-------------------------------------------------------------------------------
-- Doctest
-------------------------------------------------------------------------------

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Control.Monad (void)
-- >>> import Data.Foldable (traverse_)
-- >>> import Data.List (sort)
--
-- >>> import Test.QuickCheck ((===))
-- >>> import qualified Test.QuickCheck as QC
--
-- >>> import Kleene.Classes (match)
-- >>> let asEREChar :: ERE Char -> ERE Char; asEREChar = id