{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} -- | Efficient combinatorial algorithms over multisets, including -- generating all permutations, partitions, subsets, cycles, and -- other combinatorial structures based on multisets. Note that an -- 'Eq' or 'Ord' instance on the elements is /not/ required; the -- algorithms are careful to keep track of which things are (by -- construction) equal to which other things, so equality testing is -- not needed. module Math.Combinatorics.Multiset ( -- * The 'Multiset' type Count , Multiset(..) , emptyMS, singletonMS , consMS, (+:) -- ** Conversions , toList , fromList , fromListEq , fromDistinctList , fromCounts , getCounts , size -- ** Operations , disjUnion , disjUnions -- * Permutations , permutations , permutationsRLE -- * Partitions , Vec , vPartitions , partitions -- * Submultisets , splits , kSubsets -- * Cycles and bracelets , cycles , bracelets , genFixedBracelets -- * Miscellaneous , sequenceMS ) where import Control.Arrow (first, second, (&&&), (***)) import Control.Monad (forM_, when) import Control.Monad.Trans.Writer import qualified Data.IntMap.Strict as IM import Data.List (group, partition, sort) import Data.Maybe (catMaybes, fromJust) type Count = Int -- | A multiset is represented as a list of (element, count) pairs. -- We maintain the invariants that the counts are always positive, -- and no element ever appears more than once. newtype Multiset a = MS { toCounts :: [(a, Count)] } deriving (Show, Functor) -- | Construct a 'Multiset' from a list of (element, count) pairs. -- Precondition: the counts must all be positive, and there must not -- be any duplicate elements. fromCounts :: [(a, Count)] -> Multiset a fromCounts = MS -- | Extract just the element counts from a multiset, forgetting the -- elements. getCounts :: Multiset a -> [Count] getCounts = map snd . toCounts -- | Compute the total size of a multiset. size :: Multiset a -> Int size = sum . getCounts liftMS :: ([(a, Count)] -> [(b, Count)]) -> Multiset a -> Multiset b liftMS f (MS m) = MS (f m) -- | A multiset with no values in it. emptyMS :: Multiset a emptyMS = MS [] -- | Create a multiset with only a single value in it. singletonMS :: a -> Multiset a singletonMS a = MS [(a,1)] -- | Add an element with multiplicity to a multiset. Precondition: -- the new element is distinct from all elements already in the -- multiset. consMS :: (a, Count) -> Multiset a -> Multiset a consMS e@(_,c) (MS m) | c > 0 = MS (e:m) | otherwise = MS m -- | A convenient shorthand for 'consMS'. (+:) :: (a, Count) -> Multiset a -> Multiset a (+:) = consMS -- | Convert a multiset to a list. toList :: Multiset a -> [a] toList = expandCounts . toCounts expandCounts :: [(a, Count)] -> [a] expandCounts = concatMap (uncurry (flip replicate)) -- | Efficiently convert a list to a multiset, given an 'Ord' instance -- for the elements. This method is provided just for convenience. -- you can also use 'fromListEq' with only an 'Eq' instance, or -- construct 'Multiset's directly using 'fromCounts'. fromList :: Ord a => [a] -> Multiset a fromList = fromCounts . map (head &&& length) . group . sort -- | Convert a list to a multiset, given an 'Eq' instance for the -- elements. fromListEq :: Eq a => [a] -> Multiset a fromListEq = fromCounts . fromListEq' where fromListEq' [] = [] fromListEq' (x:xs) = (x, 1 + length xEqs) : fromListEq' xNeqs where (xEqs, xNeqs) = partition (==x) xs -- | Make a multiset with one copy of each element from a list of -- distinct elements. fromDistinctList :: [a] -> Multiset a fromDistinctList = fromCounts . map (\x -> (x,1)) -- | Form the disjoint union of two multisets; i.e. we assume the two -- multisets share no elements in common. disjUnion :: Multiset a -> Multiset a -> Multiset a disjUnion (MS xs) (MS ys) = MS (xs ++ ys) -- | Form the disjoint union of a collection of multisets. We assume -- that the multisets all have distinct elements. disjUnions :: [Multiset a] -> Multiset a disjUnions = foldr disjUnion (MS []) -- | In order to generate permutations of a multiset, we need to keep -- track of the most recently used element in the permutation being -- built, so that we don't use it again immediately. The -- 'RMultiset' type (for \"restricted multiset\") records this -- information, consisting of a multiset possibly paired with an -- element (with multiplicity) which is also part of the multiset, -- but should not be used at the beginning of permutations. data RMultiset a = RMS (Maybe (a, Count)) [(a,Count)] deriving Show -- | Convert a 'Multiset' to a 'RMultiset' (with no avoided element). toRMS :: Multiset a -> RMultiset a toRMS = RMS Nothing . toCounts -- | Convert a 'RMultiset' to a 'Multiset'. fromRMS :: RMultiset a -> Multiset a fromRMS (RMS Nothing m) = MS m fromRMS (RMS (Just e) m) = MS (e:m) -- | List all the distinct permutations of the elements of a -- multiset. -- -- For example, @permutations (fromList \"abb\") == -- [\"abb\",\"bba\",\"bab\"]@, whereas @Data.List.permutations -- \"abb\" == [\"abb\",\"bab\",\"bba\",\"bba\",\"bab\",\"abb\"]@. -- This function is equivalent to, but /much/ more efficient than, -- @nub . Data.List.permutations@, and even works when the elements -- have no 'Eq' instance. -- -- Note that this is a specialized version of 'permutationsRLE', -- where each run has been expanded via 'replicate'. permutations :: Multiset a -> [[a]] permutations = map expandCounts . permutationsRLE -- | List all the distinct permutations of the elements of a multiset, -- with each permutation run-length encoded. (Note that the -- run-length encoding is a natural byproduct of the algorithm used, -- not a separate postprocessing step.) -- -- For example, @permutationsRLE [('a',1), ('b',2)] == -- [[('a',1),('b',2)],[('b',2),('a',1)],[('b',1),('a',1),('b',1)]]@. -- -- (Note that although the output type is newtype-equivalent to -- @[Multiset a]@, we don't call it that since the output may -- violate the 'Multiset' invariant that no element should appear -- more than once. And indeed, morally this function does not -- output multisets at all.) permutationsRLE :: Multiset a -> [[(a,Count)]] permutationsRLE (MS []) = [[]] permutationsRLE m = permutationsRLE' (toRMS m) -- | List all the (run-length encoded) distinct permutations of the -- elements of a multiset which do not start with the element to -- avoid (if any). permutationsRLE' :: RMultiset a -> [[(a,Count)]] -- If only one element is left, there's only one permutation. permutationsRLE' (RMS Nothing [(x,n)]) = [[(x,n)]] -- Otherwise, select an element+multiplicity in all possible ways, and -- concatenate the elements to all possible permutations of the -- remaining multiset. permutationsRLE' m = [ e : p | (e, m') <- selectRMS m , p <- permutationsRLE' m' ] -- | Select an element + multiplicity from a multiset in all possible -- ways, appropriately keeping track of elements to avoid at the -- start of permutations. selectRMS :: RMultiset a -> [((a, Count), RMultiset a)] -- No elements to select. selectRMS (RMS _ []) = [] -- Selecting from a multiset with n copies of x, avoiding e: selectRMS (RMS e ((x,n) : ms)) = -- If we select all n copies of x, there are no copies of x left to avoid; -- stick e (if it exists) back into the remaining multiset. ((x,n), RMS Nothing (maybe ms (:ms) e)) : -- We can also select any number of copies of x from (n-1) down to 1; in each case, -- we avoid the remaining copies of x and put e back into the returned multiset. [ ( (x,k), RMS (Just (x,n-k)) (maybe ms (:ms) e) ) | k <- [n-1, n-2 .. 1] ] ++ -- Finally, we can recursively choose something other than x. map (second (consRMS (x,n))) (selectRMS (RMS e ms)) consRMS :: (a, Count) -> RMultiset a -> RMultiset a consRMS x (RMS e m) = RMS e (x:m) -- Some QuickCheck properties. Of course, due to combinatorial -- explosion these are of limited utility! -- newtype ArbCount = ArbCount Int -- deriving (Eq, Show, Num, Real, Enum, Ord, Integral) -- instance Arbitrary Count where -- arbitrary = elements (map ArbCount [1..3]) -- prop_perms_distinct :: Multiset Char ArbCount -> Bool -- prop_perms_distinct m = length ps == length (nub ps) -- where ps = permutations m -- prop_perms_are_perms :: Multiset Char ArbCount -> Bool -- prop_perms_are_perms m = all ((==l') . sort) (permutations m) -- where l' = sort (toList m) --------------------- -- Partitions --------------------- -- | Element count vector. type Vec = [Count] -- | Componentwise comparison of count vectors. (<|=) :: Vec -> Vec -> Bool xs <|= ys = and $ zipWith (<=) xs ys -- | 'vZero v' produces a zero vector of the same length as @v@. vZero :: Vec -> Vec vZero = map (const 0) -- | Test for the zero vector. vIsZero :: Vec -> Bool vIsZero = all (==0) -- | Do vector arithmetic componentwise. (.+.), (.-.) :: Vec -> Vec -> Vec (.+.) = zipWith (+) (.-.) = zipWith (-) -- | Multiply a count vector by a scalar. (*.) :: Count -> Vec -> Vec (*.) n = map (n*) -- | 'v1 `vDiv` v2' is the largest scalar multiple of 'v2' which is -- elementwise less than or equal to 'v1'. vDiv :: Vec -> Vec -> Count vDiv v1 v2 = minimum . catMaybes $ zipWith zdiv v1 v2 where zdiv _ 0 = Nothing zdiv x y = Just $ x `div` y -- | 'vInc within v' lexicographically increments 'v' with respect to -- 'within'. For example, @vInc [2,3,5] [1,3,4] == [1,3,5]@, and -- @vInc [2,3,5] [1,3,5] == [2,0,0]@. vInc :: Vec -> Vec -> Vec vInc lim v = reverse (vInc' (reverse lim) (reverse v)) where vInc' _ [] = [] vInc' [] (x:xs) = x+1 : xs vInc' (l:ls) (x:xs) | x < l = x+1 : xs | otherwise = 0 : vInc' ls xs -- | Generate all vector partitions, representing each partition as a -- multiset of vectors. -- -- This code is a slight generalization of the code published in -- -- Brent Yorgey. \"Generating Multiset Partitions\". In: The -- Monad.Reader, Issue 8, September 2007. -- <http://www.haskell.org/sitewiki/images/d/dd/TMR-Issue8.pdf> -- -- See that article for a detailed discussion of the code and how it works. vPartitions :: Vec -> [Multiset Vec] vPartitions v = vPart v (vZero v) where vPart v _ | vIsZero v = [MS []] vPart v vL | v <= vL = [] | otherwise = MS [(v,1)] : [ (v',k) +: p' | v' <- withinFromTo v (vHalf v) (vInc v vL) , k <- [1 .. (v `vDiv` v')] , p' <- vPart (v .-. (k *. v')) v' ] -- | 'vHalf v' computes the \"lexicographic half\" of 'v', that is, -- the vector which is the middle element (biased towards the end) -- in a lexicographically decreasing list of all the vectors -- elementwise no greater than 'v'. vHalf :: Vec -> Vec vHalf [] = [] vHalf (x:xs) | (even x) = (x `div` 2) : vHalf xs | otherwise = (x `div` 2) : xs downFrom n = [n,(n-1)..0] -- | 'within m' generates a lexicographically decreasing list of -- vectors elementwise no greater than 'm'. within :: Vec -> [Vec] within = sequence . map downFrom -- | Clip one vector against another. clip :: Vec -> Vec -> Vec clip = zipWith min -- | 'withinFromTo m s e' efficiently generates a lexicographically -- decreasing list of vectors which are elementwise no greater than -- 'm' and lexicographically between 's' and 'e'. withinFromTo :: Vec -> Vec -> Vec -> [Vec] withinFromTo m s e | not (s <|= m) = withinFromTo m (clip m s) e withinFromTo m s e | e > s = [] withinFromTo m s e = wFT m s e True True where wFT [] _ _ _ _ = [[]] wFT (m:ms) (s:ss) (e:es) useS useE = let start = if useS then s else m end = if useE then e else 0 in [x:xs | x <- [start,(start-1)..end], let useS' = useS && x==s, let useE' = useE && x==e, xs <- wFT ms ss es useS' useE' ] -- | Efficiently generate all distinct multiset partitions. Note that -- each partition is represented as a multiset of parts (each of -- which is a multiset) in order to properly reflect the fact that -- some parts may occur multiple times. partitions :: Multiset a -> [Multiset (Multiset a)] partitions (MS []) = [MS []] partitions (MS m) = (map . fmap) (combine elts) $ vPartitions counts where (elts, counts) = unzip m combine es cs = MS . filter ((/=0) . snd) $ zip es cs -- | Generate all splittings of a multiset into two submultisets, -- i.e. all size-two partitions. splits :: Multiset a -> [(Multiset a, Multiset a)] splits (MS []) = [(MS [], MS [])] splits (MS ((x,n):m)) = for [0..n] $ \k -> map (addElt x k *** addElt x (n-k)) (splits (MS m)) -- | Generate all size-k submultisets. kSubsets :: Count -> Multiset a -> [Multiset a] kSubsets 0 _ = [MS []] kSubsets _ (MS []) = [] kSubsets k (MS ((x,n):m)) = for [0 .. min k n] $ \j -> map (addElt x j) (kSubsets (k - j) (MS m)) for = flip concatMap addElt _ 0 = id addElt x k = ((x,k) +:) ---------------------------------------------------------------------- -- Cycles (aka Necklaces) ---------------------------------------------------------------------- -- | Generate all distinct cycles, aka necklaces, with elements taken -- from a multiset. See J. Sawada, \"A fast algorithm to generate -- necklaces with fixed content\", J. Theor. Comput. Sci. 301 (2003) -- pp. 477-489. -- -- Given the ordering on the elements of the multiset based on their -- position in the multiset representation (with \"smaller\" -- elements first), in @map reverse (cycles m)@, each generated -- cycle is lexicographically smallest among all its cyclic shifts, -- and furthermore, the cycles occur in reverse lexicographic -- order. (It's simply more convenient/efficient to generate the -- cycles reversed in this way, and of course we get the same set of -- cycles either way.) -- -- For example, @cycles (fromList \"aabbc\") == -- [\"cabba\",\"bcaba\",\"cbaba\",\"bbcaa\",\"bcbaa\",\"cbbaa\"]@. cycles :: Multiset a -> [[a]] cycles (MS []) = [] -- no such thing as an empty cycle cycles m@(MS ((x1,n1):xs)) | n1 == 1 = (cycles' n 2 1 [(0,x1)] (reverse $ zip [1..] xs)) | otherwise = (cycles' n 2 1 [(0,x1)] (reverse $ zip [0..] ((x1,n1-1):xs))) where n = sum . getCounts $ m -- | The first parameter is the length of the necklaces being -- generated. The second parameter @p@ is the length of the longest -- prefix of @pre@ which is a Lyndon word, i.e. an aperiodic -- necklace. @pre@ is the current (reversed) prefix of the -- necklaces being generated. cycles' :: Int -> Int -> Int -> [(Int, a)] -> [(Int, (a,Count))] -> [[a]] cycles' n _ p pre [] | n `mod` p == 0 = [map snd pre] | otherwise = [] cycles' n t p pre xs = (takeWhile ((>=atp) . fst) xs) >>= \(j, (xj,_)) -> cycles' n (t+1) (if j == atp then p else t) ((j,xj):pre) (remove j xs) where atp = fst $ pre !! (p - 1) remove :: Int -> [(Int, (a, Int))] -> [(Int, (a, Int))] remove _ [] = [] remove j (x@(j',(xj,nj)):xs) | j == j' && nj == 1 = xs | j == j' = (j',(xj,nj-1)):xs | otherwise = x:remove j xs ---------------------------------------------------------------------- -- Bracelets ---------------------------------------------------------------------- -- Some utilities -------------------------------------------------- -- Indexable and Snocable classes class Snocable p a where (|>) :: p -> a -> p -- 1-based indexing class Indexable p where (!) :: p -> Int -> Int -------------------------------------------------- -- Prenecklaces type PreNecklace = [Int] -- A prenecklace, stored backwards, along with its length and its -- first element cached for quick retrieval. data Pre = Pre !Int (Maybe Int) PreNecklace deriving (Show) emptyPre :: Pre emptyPre = Pre 0 Nothing [] getPre :: Pre -> PreNecklace getPre (Pre _ _ as) = reverse as instance Snocable Pre Int where (Pre 0 _ []) |> a = Pre 1 (Just a) [a] (Pre t a1 as) |> a = Pre (t+1) a1 (a:as) instance Indexable Pre where _ ! 0 = 0 (Pre _ (Just a1) _) ! 1 = a1 (Pre t _ as) ! i = as !! (t-i) -- as stores a_t .. a_1. -- a_1 is the last element, i.e. with index t-1. -- a_2 has index t-2. -- In general, a_i has index t-i. -------------------------------------------------- -- Run-length encoding -- Run-length encodings. Stored in *reverse* order for easy access to -- the end. data RLE a = RLE !Int !Int [(a,Int)] deriving (Show) -- First Int is the total length of the decoded list. -- Second Int is the number of blocks. emptyRLE :: RLE a emptyRLE = RLE 0 0 [] compareRLE :: Ord a => [(a,Int)] -> [(a,Int)] -> Ordering compareRLE [] [] = EQ compareRLE [] _ = LT compareRLE _ [] = GT compareRLE ((a1,n1):rle1) ((a2,n2):rle2) | (a1,n1) == (a2,n2) = compareRLE rle1 rle2 | a1 < a2 = LT | a1 > a2 = GT | (n1 < n2 && (null rle1 || fst (head rle1) < a2)) || (n1 > n2 && not (null rle2) && a1 < fst (head rle2)) = LT | otherwise = GT instance Indexable (RLE Int) where (RLE _ _ []) ! _ = error "Bad index in (!) for RLE" (RLE n b ((a,v):rest)) ! i | i <= v = a | otherwise = (RLE (n-v) (b-1) rest) ! (i-v) instance Eq a => Snocable (RLE a) a where (RLE _ _ []) |> a' = RLE 1 1 [(a',1)] (RLE n b rle@((a,v):rest)) |> a' | a == a' = RLE (n+1) b ((a,v+1):rest) | otherwise = RLE (n+1) (b+1) ((a',1):rle) -------------------------------------------------- -- Prenecklaces + RLE -- Prenecklaces along with a run-length encoding. data Pre' = Pre' Pre (RLE Int) deriving Show emptyPre' :: Pre' emptyPre' = Pre' emptyPre emptyRLE getPre' :: Pre' -> PreNecklace getPre' (Pre' pre _) = getPre pre instance Indexable Pre' where _ ! 0 = 0 (Pre' (Pre len _ _) rle) ! i = rle ! (len - i + 1) instance Snocable Pre' Int where (Pre' p rle) |> a = Pre' (p |> a) (rle |> a) -------------------------------------------------- -- Bracelet generation type Bracelet = [Int] -- | An optimized bracelet generation algorithm, based on -- S. Karim et al, "Generating Bracelets with Fixed Content". -- <http://www.cis.uoguelph.ca/~sawada/papers/fix-brace.pdf> -- -- @genFixedBracelets n content@ produces all bracelets (unique up -- to rotation and reflection) of length @n@ using @content@, which -- consists of a list of pairs where the pair (a,i) indicates that -- element a may be used up to i times. It is assumed that the elements -- are drawn from [0..k]. genFixedBracelets :: Int -> [(Int,Int)] -> [Bracelet] genFixedBracelets n [(0,k)] | k >= n = [replicate k 0] | otherwise = [] genFixedBracelets n content = execWriter (go 1 1 0 (IM.fromList content) emptyPre') where go :: Int -> Int -> Int -> IM.IntMap Int -> Pre' -> Writer [Bracelet] () go _ _ _ con _ | IM.keys con == [0] = return () go t p r con pre@(Pre' (Pre _ _ as) _) | t > n = when (take (n - r) as >= reverse (take (n-r) as) && n `mod` p == 0) $ tell [getPre' pre] | otherwise = do let a' = pre ! (t-p) forM_ (dropWhile (< a') $ IM.keys con) $ \j -> do let con' = decrease j con pre' = pre |> j c = checkRev2 t pre' p' | j /= a' = t | otherwise = p when (c == EQ) $ go (t+1) p' t con' pre' when (c == GT) $ go (t+1) p' r con' pre' decrease :: Int -> IM.IntMap Int -> IM.IntMap Int decrease j con | IM.null con = con | otherwise = IM.alter q j con where q (Just 1) = Nothing q (Just cnt) = Just (cnt-1) q _ = Nothing checkRev2 _ (Pre' _ (RLE _ _ rle)) = compareRLE rle (reverse rle) -- | Generate all distinct bracelets (lists considered equivalent up -- to rotation and reversal) from a given multiset. The generated -- bracelets are in lexicographic order, and each is -- lexicographically smallest among its rotations and reversals. -- See @genFixedBracelets@ for a slightly more general routine with -- references. -- -- For example, @bracelets $ fromList \"RRRRRRRLLL\"@ yields -- -- > ["LLLRRRRRRR","LLRLRRRRRR","LLRRLRRRRR","LLRRRLRRRR" -- > ,"LRLRLRRRRR","LRLRRLRRRR","LRLRRRLRRR","LRRLRRLRRR"] bracelets :: Multiset a -> [[a]] bracelets ms@(MS cnts) = bs where contentMap = IM.fromList (zip [0..] (map fst cnts)) content = zipWith (\i (_,n) -> (i,n)) [0..] cnts rawBs = genFixedBracelets (size ms) content bs = map (map (fromJust . flip IM.lookup contentMap)) rawBs ---------------------------------------------------------------------- -- sequenceMS ---------------------------------------------------------------------- -- | Take a multiset of lists, and select one element from each list -- in every possible combination to form a list of multisets. We -- assume that all the list elements are distinct. sequenceMS :: Multiset [a] -> [Multiset a] sequenceMS = map disjUnions . sequence . map (\(xs, n) -> kSubsets n (MS $ uncollate (xs, n))) . toCounts uncollate :: ([a], Count) -> [(a, Count)] uncollate (xs, n) = map (\x -> (x,n)) xs