{-# LANGUAGE ViewPatterns, DeriveFunctor, BangPatterns, TupleSections, RecordWildCards #-}

-- | Applying a set of paths vs a set of patterns efficiently
module System.FilePattern.Step(
    step, step_, Step(..), StepNext(..)
    ) where

import System.FilePattern.Core
import System.FilePattern.Tree
import System.FilePattern.Wildcard

import Control.Monad.Extra
import Data.List.Extra
import Data.Semigroup
import Data.Tuple.Extra
import Data.Functor
import Data.Either
import qualified Data.List.NonEmpty as NE
import Prelude


-- | What we know about the next step values.
data StepNext
    =
      -- | All components not listed will result in dull 'Step' values from 'stepApply',
      --   with 'stepNext' being @'StepOnly' []@ and 'stepDone' being @[]@. The field is a set - their order
      --   is irrelevant but there will be no duplicates in values arising from 'step'.
      StepOnly [String]
    | -- | All calls to 'stepApply' will return 'stepNext' being 'StepEverything' with a non-empty 'stepDone'.
      StepEverything
    | -- | We have no additional information about the output from 'stepApply'.
      StepUnknown
      deriving (Eq,Ord,Show)


mergeStepNext :: [StepNext] -> StepNext
mergeStepNext = f id
    where
        f rest [] = StepOnly $ rest []
        f rest (StepUnknown:xs) = if StepEverything `elem` xs then StepEverything else StepUnknown
        f rest (StepEverything:xs) = StepEverything
        f rest (StepOnly x:xs) = f (rest . (x ++)) xs

normaliseStepNext :: StepNext -> StepNext
normaliseStepNext (StepOnly xs) = StepOnly $ nubOrd xs
normaliseStepNext x = x


instance Semigroup StepNext where
    a <> b = sconcat $ NE.fromList [a,b]
    sconcat = normaliseStepNext . mergeStepNext . NE.toList

instance Monoid StepNext where
    mempty = StepOnly []
    mappend = (<>)
    mconcat = maybe mempty sconcat . NE.nonEmpty -- important: use the fast sconcat


-- | The result of 'step', used to process successive path components of a set of 'FilePath's.
data Step a = Step
    {stepDone :: [(a, [String])]
        -- ^ The files that match at this step. Includes the list that would have been produced by 'System.FilePattern.match',
        --   along with the values passed to 'step'. These results are not necessarily in order.
    ,stepNext :: StepNext
        -- ^ Information about the results of calling 'stepApply'. See 'StepNext' for details.
    ,stepApply :: String -> Step a
        -- ^ Apply one component from a 'FilePath' to get a new 'Step'.
    }
    deriving Functor

mergeStep :: (StepNext -> StepNext) -> [Step a] -> Step a
mergeStep f [] = mempty
mergeStep f [x] = x
mergeStep f xs = Step
    {stepDone = concatMap stepDone xs
    ,stepNext = f $ mergeStepNext $ map stepNext xs
    ,stepApply = \x -> mergeStep f $ map (`stepApply` x) xs
    }

instance Semigroup (Step a) where
    a <> b = sconcat $ NE.fromList [a,b]
    sconcat (NE.toList -> ss)
        | [s] <- ss = s
        | otherwise = Step
            {stepDone = concatMap stepDone ss
            ,stepNext = mconcatMap stepNext ss
            ,stepApply = \x -> fastFoldMap (`stepApply` x) ss
            }

instance Monoid (Step a) where
    mempty = Step [] mempty $ const mempty
    mappend = (<>)
    mconcat = maybe mempty sconcat . NE.nonEmpty -- important: use the fast sconcat

fastFoldMap :: Monoid m => (a -> m) -> [a] -> m
{- HLINT ignore fastFoldMap -}
fastFoldMap f = mconcat . map f -- important: use the fast mconcat


-- Invariant: No two adjacent Lits
-- Invariant: No empty Lits
data Pat = Lits [Wildcard String]
         | StarStar
         | End
           deriving (Show,Eq,Ord)

toPat :: Pattern -> [Pat]
toPat (Pattern (Literal xs)) = [Lits xs]
toPat (Pattern (Wildcard pre mid post)) = intercalate [StarStar] $ map lit $ pre : mid ++ [post]
    where lit xs = [Lits xs | xs /= []]


-- | Efficient matching of a set of 'FilePattern's against a set of 'FilePath's.
--   First call 'step' passing in all the 'FilePattern's, with a tag for each one.
--   Next call the methods of 'Step', providing the components of the 'FilePath's in turn.
--
--   Useful for efficient bulk searching, particularly directory scanning, where you can
--   avoid descending into directories which cannot match.
step :: [(a, FilePattern)] -> Step a
step = restore . ($ id) . f [] . makeTree . map (second $ toPat . parsePattern)
    where
        f :: [Pat] -> Tree Pat a -> (Parts -> Step [a])
        f seen (Tree ends nxts) = \parts -> mergeStep id $ map ($ parts) $ sEnds ++ sNxts
            where
                sEnds = case unroll ends (seen ++ [End]) of
                    _ | null ends -> []
                    Just ([], c) -> [c (error "step invariant violated (1)")]
                    _ -> error $ "step invariant violated (2), " ++ show seen

                sNxts = flip map nxts $ \(p,ps) ->
                    let seen2 = seen ++ [p] in
                    case unroll (error "step invariant violated (3)") seen2 of
                        Nothing -> f seen2 ps
                        Just (nxt, c) -> c (f [] $ retree nxt ps)

        retree [] t = t
        retree (p:ps) t = Tree [] [(p, retree ps t)]

        restore :: Step [a] -> Step a -- and restore the stepNext invariant
        restore Step{..} = Step
            {stepDone = [(a, b) | (as,b) <- stepDone, a <- as]
            ,stepNext = normaliseStepNext stepNext
            ,stepApply = restore . stepApply
            }

-- | Like 'step' but using @()@ as the tag for each 'FilePattern'.
step_ :: [FilePattern] -> Step ()
step_ = step . map ((),)


match1 :: Wildcard String -> String -> Maybe [String]
match1 w x = rights <$> wildcardMatch equals w x


type Parts = [String] -> [String]

-- Given a prefix of the pattern, if you can deal with it, return
-- the rest of the pattern in the prefix you didn't match, and something that given
-- a matcher for the rest of the pattern, returns a matcher for the whole pattern.
unroll :: a -> [Pat] -> Maybe ([Pat], (Parts -> Step a) -> Parts -> Step a)
-- normal path, dispatch on what you find next
unroll val [End] = Just ([], \_ parts -> mempty{stepDone = [(val, parts [])]})

-- two stars in a row, the first will match nothing, the second everything
unroll val [StarStar,StarStar] = Just ([StarStar], \cont parts -> cont (parts . ([]:)))

-- if you have literals next, match them
unroll val [Lits (l:ls)] = Just ([Lits ls | ls /= []], \cont parts -> Step
    {stepDone = []
    ,stepNext = case l of Literal v -> StepOnly [v]; Wildcard{} -> StepUnknown
    ,stepApply = \s -> case match1 l s of
        Just xs -> cont (parts . (xs++))
        Nothing -> mempty
    })

-- if anything else is allowed, just quickly allow it
unroll val [StarStar,End] = Just ([], \_ parts -> g parts [])
    where
        g parts rseen = Step
            {stepDone = [(val, parts [mkParts $ reverse rseen])]
            ,stepNext = StepEverything
            ,stepApply = \s -> g parts (s:rseen)
            }

-- if you have a specific tail prefix, find it
unroll val [StarStar,Lits (reverse &&& length -> (rls,nls)),End] = Just ([], \_ parts -> g parts 0 [])
    where
        g parts !nseen rseen = Step
            {stepDone = case zipWithM match1 rls rseen of
                _ | nseen < nls -> [] -- fast path
                Just xss -> [(val, parts $ mkParts (reverse $ drop nls rseen) : concat (reverse xss))]
                Nothing -> []
            ,stepNext = StepUnknown
            ,stepApply = \s -> g parts (nseen+1) (s:rseen)
            }

-- we know the next literal, and it doesn't have any constraints immediately after
unroll val [StarStar,Lits [l],StarStar] = Just ([StarStar], \cont parts -> g cont parts [])
    where
        g cont parts rseen = Step
            {stepDone = []
            ,stepNext = StepUnknown
            ,stepApply = \s -> case match1 l s of
                Just xs -> cont (parts . (++) (mkParts (reverse rseen) : xs))
                Nothing -> g cont parts (s:rseen)
            }

-- the hard case, a floating substring, accumulate at least N, then star testing in reverse
unroll val [StarStar,Lits (reverse &&& length -> (rls,nls)),StarStar] = Just ([StarStar], \cont parts -> g cont parts 0 [])
    where
        g cont parts !nseen rseen = Step
            {stepDone = []
            ,stepNext = StepUnknown
            ,stepApply = \s -> case zipWithM match1 rls (s:rseen) of
                _ | nseen+1 < nls -> g cont parts (nseen+1) (s:rseen) -- not enough accumulated yet
                Nothing -> g cont parts (nseen+1) (s:rseen)
                Just xss -> cont (parts . (++) (mkParts (reverse $ drop nls $ s:rseen) : concat (reverse xss)))
            }

unroll _ _ = Nothing