{-# LANGUAGE CPP                 #-}
{-# LANGUAGE DataKinds           #-}
{-# LANGUAGE GADTs               #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Trustworthy         #-}
-- | Context free grammars, where
-- each production is a regular-expression.
module RERE.CFG (
    -- * Context-free grammars
    CFG,
    CFGBase,
    -- * Conversion to recursive regular expressions
    cfgToRE,
    ) where

import Data.Fin      (Fin (..))
import Data.Nat      (Nat (..))
import Data.Vec.Lazy (Vec (..))

import qualified Data.Type.Nat as N
import qualified Data.Vec.Lazy as V

import RERE.Type
import RERE.Var

#if !MIN_VERSION_base(4,8,0)
import Data.Traversable (Traversable (..))
#endif

-- $setup
-- >>> :set -XOverloadedStrings

-- | Context-free grammar represented as @n@ equations
-- of 'RE' ('CFGBase') with @n@ variables.
--
type CFG n a = Vec n (CFGBase n a)

-- | Single equation in context-free-grammar equation.
type CFGBase n a = RE (Either (Fin n) a)

-- | Convert 'CFG' (with names for productions) into 'RE'.
-- Note: the start symbol have to be last equation.
--
-- >>> let a = Eps \/ ch_ 'a' <> Var (Left FZ)
-- >>> let b = Eps \/ ch_ 'b' <> Var (Left (FS FZ))
-- >>> let cfg = b ::: a ::: VNil
--
-- \[
-- \begin{aligned}
-- {\mathit{b}} &= {\varepsilon}\cup\mathtt{b}{\mathit{a}} \\
-- {\mathit{a}} &= {\varepsilon}\cup\mathtt{a}{\mathit{b}} \\
-- \end{aligned}
-- \]
--
-- >>> cfgToRE ("b" ::: "a" ::: VNil) cfg
-- Fix "a" (Let "b" (Alt Eps (App (Ch "b") (Var B))) (Alt Eps (App (Ch "a") (Var B))))
--
-- which represents \(\mathbf{fix}\,{\mathit{a}}=\mathbf{let}\,{\mathit{b}}={\varepsilon}\cup\mathtt{b}{\mathit{a}}\,\mathbf{in}\,{\varepsilon}\cup\mathtt{a}{\mathit{b}}\)
-- recursive regular expression.
--
cfgToRE :: (N.SNatI n, Ord a) => Vec ('S n) Name -> CFG ('S n) a -> RE a
cfgToRE = getCfgToRE (N.induction1 start step) where
    start = CfgToRE baseCase

    step :: Ord a => CfgToRE m a -> CfgToRE ('S m) a
    step (CfgToRE rec) = CfgToRE $ \names cfg ->
        rec (V.tail names) (consCase names cfg)

newtype CfgToRE n a = CfgToRE { getCfgToRE :: Vec ('S n) Name -> CFG ('S n) a -> RE a }

baseCase :: Ord a => Vec N.Nat1 Name -> CFG N.Nat1 a -> RE a
baseCase (name ::: VNil) (cfg ::: VNil) =
    fix' name (fmap (either (\FZ -> B) F) cfg)
#if __GLASGOW_HASKELL__  <711
baseCase _ _ = error "silly GHC"
#endif

consCase
    :: forall a n. Ord a
    => Vec ('S n) Name
    -> CFG ('S n) a
    -> CFG n a
consCase (name ::: _names) (f ::: gs) =
    V.map (\g -> let' name f' (fmap sub g)) gs
  where
    f' = fix' name (fmap sub' f)

    sub :: Either (Fin ('S n)) a -> Var (Either (Fin n) a)
    sub (Right a)     = F (Right a)
    sub (Left (FS n)) = F (Left n)
    sub (Left FZ)     = B

    sub' :: Either (Fin ('S n)) a -> Var (Either (Fin n) a)
    sub' (Right a)     = F (Right a)
    sub' (Left (FS n)) = F (Left n)
    sub' (Left FZ)     = B

-------------------------------------------------------------------------------
-- Dummier fix and let
-------------------------------------------------------------------------------

-- This functions only rearrange fix and let,
-- and don't perform other simplifications.

fix' :: Eq a => Name -> RE (Var a) -> RE a
-- fix' n (Let m r s)
--     | Just r' <- traverse (unvar Nothing Just) r
--     = Let m r' (fix' n (fmap swapVar s))
fix' n r
    | Just r' <- floatOut r (unvar Nothing Just) (fix' n)
    = r'
    | Just r' <- traverse (unvar Nothing Just) r
    = r'
fix' n r = Fix n r

floatOut
    :: (Eq a, Eq b)
    => RE (Var a)                        -- ^ expression
    -> (Var a -> Maybe b)                -- ^ float out var
    -> (RE (Var (Var a)) -> RE (Var b))  -- ^ binder
    -> Maybe (RE b)                      -- ^ maybe an expression with let floaten out
floatOut (Let m r s) un mk
    | Just r' <- traverse un r
    = Just
    $ let' m r' $ mk $ fmap swapVar s
    | otherwise
    = floatOut
        s
        (unvar Nothing un)
        (mk . let' m (fmap (fmap F) r) . fmap (fmap swapVar))
floatOut _ _ _ = Nothing

let' :: Eq a => Name -> RE a -> RE (Var a) -> RE a
let' n (Let m x r) s
    = let' m x
    $ let' n r (fmap (unvar B (F . F)) s)
let' n r s = postlet' n r (go B (fmap F r) s) where
    -- This simple CSE only looks for lets. i.e
    --
    --     let x = a; y = a in ...body x y...
    --     -- >
    --     let x = a in ...body x x...
    --
    -- 'consCase' introduces same lets, so this fires a lot.
    --
    -- Note: not using let' or fix' in the bodies
    -- makes this faster.
    go :: Eq b => b -> RE b -> RE b -> RE b
    go v x (Let m a b)
        | x == a    = go v x (fmap (unvar v id) b)
        | otherwise = Let m (go v x a) (go (F v) (fmap F x) b)
    go v x (Fix m a) = Fix m (go (F v) (fmap F x) a)

    go _ _ r' = r'

postlet' :: Name -> RE a -> RE (Var a) -> RE a
postlet' _ r (Var B)                       = r
postlet' _ _ s       | Just s' <- unused s = s'
postlet' n r s                             = Let n r s

unused :: RE (Var a) -> Maybe (RE a)
unused = traverse (unvar Nothing Just)