{-
	Copyright (C) 2010-2015 Dr. Alistair Ward

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.
-}
{- |
 [@AUTHOR@]	Dr. Alistair Ward

 [@DESCRIPTION@]	Miscellaneous polymorphic list-operations.
-}

module ToolShed.Data.List(
-- * Types
-- ** Type-synonyms
	ChunkLength,
	Matches,
-- * Functions
	chunk,
	excise,
	equalityBy,
	findConvergence,
	findConvergenceBy,
	interleave,
	linearise,
	measureJaroDistance,
	merge,
	mergeBy,
	nub',
--	nubWithInt,
	permutations,
	permutationsBy,
	takeUntil,
	showListWith,
--	showListWithChar,
--	showListWithString
) where

import qualified	Control.Arrow
import			Control.Arrow((&&&),(***))
import qualified	Data.IntSet
import qualified	Data.List
import qualified	Data.Set
import qualified	Data.Tuple

-- | The length of the chunks into which a list is split.
type ChunkLength	= Int

{- |
	* Splits a list into chunks of the specified length.

	* The last chunk will be shorter, if the chunk-length isn't an aliquot part of the input list-length.

	* If the chunk-length is zero, the resulting list will be an infinite sequence of null lists.

	* CAVEAT: a similar function is available in the module /Data.List.Split/, though this one checks for @(chunkLength < 0)@.
-}
chunk
	:: ChunkLength
	-> [a]	-- ^ The polymorphic input list to be chunked.
	-> [[a]]
chunk size list
	| size < 0	= error $ "ToolShed.Data.List.chunk:\tnegative chunk-size=" ++ show size
	| otherwise	= chunk' list
	where
		chunk' :: [a] -> [[a]]
		chunk' []	= []
		chunk' a	= uncurry (:) . Control.Arrow.second chunk' $ splitAt size a

-- | Remove the single indexed element from the list.
excise
	:: Int	-- ^ The index.
	-> [a]	-- ^ The polymorphic input list.
	-> [a]	-- ^ The same list, with the indexed element removed.
excise 0	= tail	-- Just for efficiency.
excise i	= uncurry (++) . Control.Arrow.second tail . splitAt i

-- | The type of function required by 'findConvergenceBy', 'permutationsBy'.
type Matches a	= a -> a -> Bool

-- | A convenient way to compose the 'Matches'-function required by 'findConvergenceBy' & 'permutationsBy'.
equalityBy :: Eq b => (a -> b) -> Matches a
equalityBy f x y	= f x == f y

-- | Take the first element from the (potentially infinite) list, which matches the subsequent element, according to the specified function.
findConvergenceBy :: Matches a -> [a] -> a
findConvergenceBy _ []	=  error "ToolShed.Data.List.findConvergenceBy:\ta null list is too short for convergence to exist"
findConvergenceBy _ [_]	=  error "ToolShed.Data.List.findConvergenceBy:\ta singleton list is too short for convergence to exist"
findConvergenceBy matches l
	| null l'	= error "ToolShed.Data.List.findConvergenceBy:\tno convergence found"
	| otherwise	= fst $ head l'
	where
		l'	= dropWhile (not . uncurry matches) . uncurry zip $ (init &&& tail) l

-- | A specific instance of 'findConvergenceBy'.
findConvergence :: Eq a => [a] -> a
findConvergence	= findConvergenceBy (==)

{- |
	* The list of all permutations, generated by selecting any one datum from each sub-list in turn, from the specified list of lists.

	* As each item is selected, the remaining lists are filtered according to the specified 'Matches'-function.

	* Thus '/=' could be used to select a different item from each list.
-}
permutationsBy :: Matches a -> [[a]] -> [[a]]
permutationsBy matches lists
	| any null lists	= []	-- Required for efficiency, to catch the case [bigList1, bigList2 ... null]
	| otherwise		= slave lists
	where
		slave (xs : xss)	= [x : xs' | x <- xs, xs' <- slave $ map (filter $ matches x) xss]
		slave []		= [[]]

{- |
	* The list of all permutations, generated by selecting any one datum from each sub-list in turn, from the specified list of lists.

	* A specific instance of 'permutationsBy', in which no filtering of subsequent lists is performed after each item is selected.

	* N.B.: differs from 'Data.List.permutations', which selects items from a single input list.
-}
permutations :: [[a]] -> [[a]]
permutations	= permutationsBy (\_ _ -> True)

{-# NOINLINE nub' #-}
{-# RULES "nub'/Int" nub' = nubWithInt #-}

{- |
	* A strict version of 'Data.List.nub' with better time-complexity.

	* CAVEAT: the specified list must be finite, since the entire set is constructed before streaming to a list.

	* CAVEAT: it sorts the output as a side-effect, & consequently it requires a type which implements 'Ord'.
-}
nub' :: Ord a => [a] -> [a]
nub'	= Data.Set.toList . Data.Set.fromList

-- | A specialisation for type 'Int'.
nubWithInt :: [Int] -> [Int]
nubWithInt	= Data.IntSet.toList . Data.IntSet.fromList

-- | Converts a list of /Pairs/, into a narrower list.
linearise :: [(a, a)] -> [a]
linearise []			= []
linearise ((l, r) : remainder)	= l : r : linearise remainder	-- Recurse.

-- | Interleaves the specified lists, taking the first item from the first list.
interleave :: [a] -> [a] -> [a]
interleave (x : xs) ys	= x : interleave ys xs
interleave _ ys		= ys

{- |
	* Merge two sorted lists, according to the specified order, to product a single sorted list.

	* The merge-process is /stable/, in that where items from each list are equal, they remain in the original order.

	* CAVEAT: duplicates are preserved.
-}
mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
mergeBy cmp	= slave	where
	slave l@(x : xs) r@(y : ys)
		| x `cmp` y == GT	= y : slave l ys
		| otherwise		= x : slave xs r
	slave [] r	= r
	slave l []	= l

-- | A specific instance of 'mergeBy'.
merge :: Ord a => [a] -> [a] -> [a]
merge	= mergeBy compare

{- |
	* Take until the specified predicate is satisfied; /including/ the item which satisfied it.

	* N.B.: @takeWhile (not . test)@ would return one fewer item.
-}
takeUntil
	:: (a -> Bool)	-- ^ Predicate, used to determine the last item taken.
	-> [a]		-- ^ The polymorphic input list.
	-> [a]
takeUntil predicate	= takeUntil'	where
	takeUntil' (x : xs)	= x {-take regardless-} : if predicate x then [] else takeUntil' xs
	takeUntil' _		= []

-- | Show a list, delimited by the specified tokens.
showListWith :: (Show token, Show element)
	=> (token, token, token)	-- ^ (Start-delimiter, separator, end-delimiter)
	-> [element]			-- ^ The polymorphic list to show.
	-> ShowS
showListWith (startDelimiter, separator, endDelimiter)	= foldr (.) (shows endDelimiter) . (shows startDelimiter :) . Data.List.intersperse (shows separator) . map shows

{-# NOINLINE showListWith #-}
{-# RULES "showListWith/Char" showListWith = showListWithChar #-}
{-# RULES "showListWith/String" showListWith = showListWithString #-}

-- | A specialisation of 'showListWith'.
showListWithChar :: Show element => (Char, Char, Char) -> [element] -> ShowS
showListWithChar (startDelimiter, separator, endDelimiter)	= foldr (.) (showChar endDelimiter) . (showChar startDelimiter :) . Data.List.intersperse (showChar separator) . map shows

-- | A specialisation of 'showListWith'.
showListWithString :: Show element => (String, String, String) -> [element] -> ShowS
showListWithString (startDelimiter, separator, endDelimiter)	= foldr (.) (showString endDelimiter) . (showString startDelimiter :) . Data.List.intersperse (showString separator) . map shows

{- |
	* Measures the /distance/ between two lists (typically Strings).

	* The operation is /commutative/; it doesn't matter about the order of the arguments.

	* The result ranges from /0/ when they're completely dissimilar, to /1/ when identical.

	* <https://lingpipe-blog.com/2006/12/13/code-spelunking-jaro-winkler-string-comparison>.
-}
measureJaroDistance :: (Eq a, Fractional distance) => ([a], [a]) -> distance
measureJaroDistance pair
	| uncurry (==) pair	= 1
	| nMatches == 0		= 0	-- Guard against divide-by-zero.
	| otherwise		= sum [
		fMatches / fromIntegral l,
		fMatches / fromIntegral l',
		1 - fromIntegral (
			foldr (
				\cc	-> if uncurry (==) cc then id else succ
			) (0 :: Int) $ uncurry zip matchesPair	-- Count transpositions; matches which occur in a different order.
		) / (
			2 {-compensate for double counting-} * fMatches
		)
	] / 3 {-normalise: each component of the above sum is in the closed unit interval-}
	where
		l, l'	:: Int
		(l, l')	= length *** length $ pair

		findMatches :: Eq a => ([a], [a]) -> [a]
		findMatches	= uncurry slave . (zip [0 ..] *** zip [0 ..])	where
			slave :: Eq a => [(Int, a)] -> [(Int, a)] -> [a]
			slave [] _		= []	-- There's nothing to match.
			slave _ []		= []	-- Ignore any remaining 'xs', since there's nothing against which to match.
			slave ((i, x) : xs) ys	= case span (
				uncurry (||) . (
					(> pred (max l l' `div` 2)) . abs . (i -) *** (x /=)
				) -- Reject either mismatches, or matches which occur outside a maximum separation.
			 ) ys of
				(matchFailures, _ : untested)	-> x : slave xs (matchFailures ++ untested)	-- Remove the matched datum, so that it can't be matched against again.
				_				-> slave xs ys					-- Failed to match 'x' in 'ys'.

		matchesPair@(matches, _)	= findMatches &&& findMatches . Data.Tuple.swap $ pair
		nMatches			= length matches
		fMatches			= fromIntegral nMatches	-- Translate to the return-type.