{- Copyright (C) 2010-2015 Dr. Alistair Ward This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. -} {- | [@AUTHOR@] Dr. Alistair Ward [@DESCRIPTION@] Miscellaneous polymorphic list-operations. -} module ToolShed.Data.List( -- * Types -- ** Type-synonyms ChunkLength, Matches, -- * Functions chunk, excise, equalityBy, findConvergence, findConvergenceBy, interleave, linearise, measureJaroDistance, merge, mergeBy, nub', -- nubWithInt, permutations, permutationsBy, takeUntil, showListWith, -- showListWithChar, -- showListWithString ) where import qualified Control.Arrow import Control.Arrow((&&&),(***)) import qualified Data.IntSet import qualified Data.List import qualified Data.Set import qualified Data.Tuple -- | The length of the chunks into which a list is split. type ChunkLength = Int {- | * Splits a list into chunks of the specified length. * The last chunk will be shorter, if the chunk-length isn't an aliquot part of the input list-length. * If the chunk-length is zero, the resulting list will be an infinite sequence of null lists. * CAVEAT: a similar function is available in the module /Data.List.Split/, though this one checks for @(chunkLength < 0)@. -} chunk :: ChunkLength -> [a] -- ^ The polymorphic input list to be chunked. -> [[a]] chunk size list | size < 0 = error $ "ToolShed.Data.List.chunk:\tnegative chunk-size=" ++ show size | otherwise = chunk' list where chunk' :: [a] -> [[a]] chunk' [] = [] chunk' a = uncurry (:) . Control.Arrow.second chunk' $ splitAt size a -- | Remove the single indexed element from the list. excise :: Int -- ^ The index. -> [a] -- ^ The polymorphic input list. -> [a] -- ^ The same list, with the indexed element removed. excise 0 = tail -- Just for efficiency. excise i = uncurry (++) . Control.Arrow.second tail . splitAt i -- | The type of function required by 'findConvergenceBy', 'permutationsBy'. type Matches a = a -> a -> Bool -- | A convenient way to compose the 'Matches'-function required by 'findConvergenceBy' & 'permutationsBy'. equalityBy :: Eq b => (a -> b) -> Matches a equalityBy f x y = f x == f y -- | Take the first element from the (potentially infinite) list, which matches the subsequent element, according to the specified function. findConvergenceBy :: Matches a -> [a] -> a findConvergenceBy _ [] = error "ToolShed.Data.List.findConvergenceBy:\ta null list is too short for convergence to exist" findConvergenceBy _ [_] = error "ToolShed.Data.List.findConvergenceBy:\ta singleton list is too short for convergence to exist" findConvergenceBy matches l | null l' = error "ToolShed.Data.List.findConvergenceBy:\tno convergence found" | otherwise = fst $ head l' where l' = dropWhile (not . uncurry matches) . uncurry zip $ (init &&& tail) l -- | A specific instance of 'findConvergenceBy'. findConvergence :: Eq a => [a] -> a findConvergence = findConvergenceBy (==) {- | * The list of all permutations, generated by selecting any one datum from each sub-list in turn, from the specified list of lists. * As each item is selected, the remaining lists are filtered according to the specified 'Matches'-function. * Thus '/=' could be used to select a different item from each list. -} permutationsBy :: Matches a -> [[a]] -> [[a]] permutationsBy matches lists | any null lists = [] -- Required for efficiency, to catch the case [bigList1, bigList2 ... null] | otherwise = slave lists where slave (xs : xss) = [x : xs' | x <- xs, xs' <- slave $ map (filter $ matches x) xss] slave [] = [[]] {- | * The list of all permutations, generated by selecting any one datum from each sub-list in turn, from the specified list of lists. * A specific instance of 'permutationsBy', in which no filtering of subsequent lists is performed after each item is selected. * N.B.: differs from 'Data.List.permutations', which selects items from a single input list. -} permutations :: [[a]] -> [[a]] permutations = permutationsBy (\_ _ -> True) {-# NOINLINE nub' #-} {-# RULES "nub'/Int" nub' = nubWithInt #-} {- | * A strict version of 'Data.List.nub' with better time-complexity. * CAVEAT: the specified list must be finite, since the entire set is constructed before streaming to a list. * CAVEAT: it sorts the output as a side-effect, & consequently it requires a type which implements 'Ord'. -} nub' :: Ord a => [a] -> [a] nub' = Data.Set.toList . Data.Set.fromList -- | A specialisation for type 'Int'. nubWithInt :: [Int] -> [Int] nubWithInt = Data.IntSet.toList . Data.IntSet.fromList -- | Converts a list of /Pairs/, into a narrower list. linearise :: [(a, a)] -> [a] linearise [] = [] linearise ((l, r) : remainder) = l : r : linearise remainder -- Recurse. -- | Interleaves the specified lists, taking the first item from the first list. interleave :: [a] -> [a] -> [a] interleave (x : xs) ys = x : interleave ys xs interleave _ ys = ys {- | * Merge two sorted lists, according to the specified order, to product a single sorted list. * The merge-process is /stable/, in that where items from each list are equal, they remain in the original order. * CAVEAT: duplicates are preserved. -} mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] mergeBy cmp = slave where slave l@(x : xs) r@(y : ys) | x `cmp` y == GT = y : slave l ys | otherwise = x : slave xs r slave [] r = r slave l [] = l -- | A specific instance of 'mergeBy'. merge :: Ord a => [a] -> [a] -> [a] merge = mergeBy compare {- | * Take until the specified predicate is satisfied; /including/ the item which satisfied it. * N.B.: @takeWhile (not . test)@ would return one fewer item. -} takeUntil :: (a -> Bool) -- ^ Predicate, used to determine the last item taken. -> [a] -- ^ The polymorphic input list. -> [a] takeUntil predicate = takeUntil' where takeUntil' (x : xs) = x {-take regardless-} : if predicate x then [] else takeUntil' xs takeUntil' _ = [] -- | Show a list, delimited by the specified tokens. showListWith :: (Show token, Show element) => (token, token, token) -- ^ (Start-delimiter, separator, end-delimiter) -> [element] -- ^ The polymorphic list to show. -> ShowS showListWith (startDelimiter, separator, endDelimiter) = foldr (.) (shows endDelimiter) . (shows startDelimiter :) . Data.List.intersperse (shows separator) . map shows {-# NOINLINE showListWith #-} {-# RULES "showListWith/Char" showListWith = showListWithChar #-} {-# RULES "showListWith/String" showListWith = showListWithString #-} -- | A specialisation of 'showListWith'. showListWithChar :: Show element => (Char, Char, Char) -> [element] -> ShowS showListWithChar (startDelimiter, separator, endDelimiter) = foldr (.) (showChar endDelimiter) . (showChar startDelimiter :) . Data.List.intersperse (showChar separator) . map shows -- | A specialisation of 'showListWith'. showListWithString :: Show element => (String, String, String) -> [element] -> ShowS showListWithString (startDelimiter, separator, endDelimiter) = foldr (.) (showString endDelimiter) . (showString startDelimiter :) . Data.List.intersperse (showString separator) . map shows {- | * Measures the /distance/ between two lists (typically Strings). * The operation is /commutative/; it doesn't matter about the order of the arguments. * The result ranges from /0/ when they're completely dissimilar, to /1/ when identical. * <https://lingpipe-blog.com/2006/12/13/code-spelunking-jaro-winkler-string-comparison>. -} measureJaroDistance :: (Eq a, Fractional distance) => ([a], [a]) -> distance measureJaroDistance pair | uncurry (==) pair = 1 | nMatches == 0 = 0 -- Guard against divide-by-zero. | otherwise = sum [ fMatches / fromIntegral l, fMatches / fromIntegral l', 1 - fromIntegral ( foldr ( \cc -> if uncurry (==) cc then id else succ ) (0 :: Int) $ uncurry zip matchesPair -- Count transpositions; matches which occur in a different order. ) / ( 2 {-compensate for double counting-} * fMatches ) ] / 3 {-normalise: each component of the above sum is in the closed unit interval-} where l, l' :: Int (l, l') = length *** length $ pair findMatches :: Eq a => ([a], [a]) -> [a] findMatches = uncurry slave . (zip [0 ..] *** zip [0 ..]) where slave :: Eq a => [(Int, a)] -> [(Int, a)] -> [a] slave [] _ = [] -- There's nothing to match. slave _ [] = [] -- Ignore any remaining 'xs', since there's nothing against which to match. slave ((i, x) : xs) ys = case span ( uncurry (||) . ( (> pred (max l l' `div` 2)) . abs . (i -) *** (x /=) ) -- Reject either mismatches, or matches which occur outside a maximum separation. ) ys of (matchFailures, _ : untested) -> x : slave xs (matchFailures ++ untested) -- Remove the matched datum, so that it can't be matched against again. _ -> slave xs ys -- Failed to match 'x' in 'ys'. matchesPair@(matches, _) = findMatches &&& findMatches . Data.Tuple.swap $ pair nMatches = length matches fMatches = fromIntegral nMatches -- Translate to the return-type.