{- Utility
Gregory W. Schwartz

Collections all miscellaneous functions.
-}

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}

module Utility
    ( minMaxNorm
    , subsetIMap
    , imapToVec
    , imapMatToMat
    , lookupWithError
    , largestLeftEig
    , (/.)
    , removeMatchFilter
    , applyRows
    , avgVec
    , avgVecVec
    , getVertexSim
    , pairs
    , pairsM
    , parPairsM
    , triples
    , flipToo
    , listToTuple
    , sameWithEntityDiff
    , groupDataSets
    , fillIntMap
    , getAccuracy
    , nub'
    ) where

-- Standard
import Data.Maybe
import Data.List
import qualified Data.Set as Set
import qualified Data.Map.Strict as Map
import qualified Data.IntMap.Strict as IMap
import Data.Function (on)
import Control.Concurrent.Async

-- Cabal
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Storable as VS
import qualified Data.ByteString.Lazy.Char8 as B
import qualified Data.Text as T
import Data.ByteString.Lazy.Search (replace)
import Data.Graph.Inductive
import qualified Data.Aeson as JSON
import Control.Lens
import Numeric.LinearAlgebra

-- Local
import Types

-- | Min max normalize.
minMaxNorm :: [Double] -> [Double]
minMaxNorm xs = fmap (\x -> (x - minimum xs) / (maximum xs - minimum xs)) xs

-- | Get a subset of indices of an IntMap.
subsetIMap :: [Int] -> IMap.IntMap a -> IMap.IntMap a
subsetIMap idx = flip IMap.intersection (IMap.fromList . zip idx $ [0..])

-- | Convert an IntMap to a vector with a filler.
imapToVec :: (VG.Vector v a, Show a) => Size -> a -> IMap.IntMap a -> v a
imapToVec (Size size) filler =
    (VG.//) (VG.replicate size filler) . IMap.toAscList
    
-- | Convert an IntMap of IntMap to a square matrix with a filler.
imapMatToMat :: Size
             -> Double
             -> IMap.IntMap (IMap.IntMap Double)
             -> Matrix Double
imapMatToMat (Size size) filler =
    assoc (size, size) filler
        . concatMap (\ (!row, !v)
                    -> fmap (\ (!col, !v)
                            -> ((row, col), v)
                            ) . IMap.toAscList $ v
                    )
        . IMap.toAscList

-- | Map lookup with a custom error if the value is not found.
lookupWithError :: (Ord a) => String -> a -> Map.Map a b -> b
lookupWithError err x = fromMaybe (error err) . Map.lookup x

-- | Get the largest left eigenvector from an eig funciton call. The matrix, a
-- transition probability matrix in this program, is assumed to be symmetrical
-- here.
largestLeftEig :: (VS.Vector (Complex Double), Matrix (Complex Double))
           -> VS.Vector Double
largestLeftEig (!eigVal, !eigVec) =
    fst . fromComplex $ (tr eigVec ! maxIndex eigVal)

-- | A more generic division.
(/.) :: (Real a, Fractional c) => a -> a -> c
(/.) x y = fromRational $ toRational x / toRational y

-- | Remove indices matching a boolean function from both vectors but make
-- sure that the indices match. NOT NEEDED WITH COSINE.
removeMatchFilter :: (VS.Storable a)
                  => (a -> Bool)
                  -> Vector a
                  -> Vector a
                  -> (Vector a, Vector a)
removeMatchFilter f xs = over _2 VS.convert
                       . over _1 VS.convert
                       . V.unzip
                       . filterBad (V.convert xs)
                       . V.convert
  where
    filterBad x = V.filter (\(!a, !b) -> (not . f $ a) && (not . f $ b))
                . V.zip x

-- | Apply a folding function to a list of row vectors.
applyRows :: (Element a, VS.Storable b)
          => (Vector a -> b)
          -> [Vector a]
          -> Vector b
applyRows f = fromList . fmap f . toColumns . fromRows

-- | Average of a vector.
avgVec :: (Num a, Fractional a, VG.Vector v a) => v a -> a
avgVec xs = VG.sum xs / (fromIntegral $ VG.length xs)

-- | Average entries of a list of vectors.
avgVecVec :: (Num a, Fractional a, VG.Vector v a) => [v a] -> v a
avgVecVec xs = VG.map (/ genericLength xs)
             . foldl1' (VG.zipWith (+))
             $ xs

-- | Get the vertex similarity matrix for two levels, erroring out if the
-- levels don't exist.
getVertexSim :: LevelName -> LevelName -> VertexSimMap -> VertexSimValues
getVertexSim l1 l2 (VertexSimMap vMap) =
    case Map.lookup l1 vMap of
        Nothing  -> fromMaybe (error $ levelErr l1)
                  . maybe (error $ levelErr l2) (Map.lookup l1)
                  . Map.lookup l2
                  $ vMap
        (Just x) -> lookupWithError (error $ levelErr l2) l2
                  $ x
  where
    levelErr l    = ( "Level: "
                   ++ (T.unpack $ unLevelName l)
                   ++ " not found in creating vertex similarity map."
                    )

-- | From
-- http://stackoverflow.com/questions/34044366/how-to-extract-all-unique-pairs-of-a-list-in-haskell,
-- extract the unique pairings of a list and apply a function to them.
pairs :: (a -> a -> b) -> [a] -> [b]
pairs f l = [f x y | (x:ys) <- tails l, y <- ys]

-- | Extract the unique pairings of a list and apply a function to them within a
-- monad.
pairsM :: (Monad m) => (a -> a -> m b) -> [a] -> m [b]
pairsM f l = sequence [f x y | (x:ys) <- tails l, y <- ys]

-- | Extract the unique pairings of a list and apply a function to them within a
-- monad. Parallel version.
parPairsM :: (a -> a -> IO b) -> [a] -> IO [b]
parPairsM _ [] = return []
parPairsM f l  = mapConcurrently (uncurry evalM) . concatMap go . tails $ l
  where
    go [] = []
    go [_] = []
    go (x:ys) = fmap (x,) ys
    evalM !x !y = do
        !res <- f x y
        return res

-- | Extract the unique triplets of a list and apply a function to them.
triples :: (a -> a -> a -> b) -> [a] -> [b]
triples f l = [f x y z | (x:ys) <- tails l, (y:zs) <- tails ys, z <- zs]

-- | Take a tuple index with a value and return it with its flip.
flipToo :: ((a, a), b) -> [((a, a), b)]
flipToo all@((!x, !y), !z) = [all, ((y, x), z)]

-- | Convert a list to a tuple.
listToTuple :: (Show a) => [a] -> (a, a)
listToTuple [!x, !y] = (x, y)
listToTuple (x:_)    = error ("Wrong pairing for " ++ show x)

-- | Check if two entities are actually the same if one contains the entityDiff
-- while the other does not.
sameWithEntityDiff :: Maybe EntityDiff -> ID -> ID -> Bool
sameWithEntityDiff Nothing (ID e1) (ID e2)                           = False
sameWithEntityDiff (Just (EntityDiff eDiff)) (ID e1) (ID e2)
    | T.count eDiff e1 == 0 && T.count eDiff e2 == 0                 = False
    | T.count eDiff e1 > 0 && T.count eDiff e2 > 0                   = False
    | (head . T.splitOn eDiff $ e1) == (head . T.splitOn eDiff $ e2) = True
    | otherwise                                                      = False

-- | Group together data sets.
groupDataSets :: Maybe Entity -> Maybe Entity -> Maybe (Double, Double)
groupDataSets Nothing _           = Nothing
groupDataSets _ Nothing           = Nothing
groupDataSets (Just !x) (Just !y) = Just (_entityValue x, _entityValue y)

-- | Fill in an intmap with zeros for missing values.
fillIntMap :: Size
           -> IMap.IntMap (Double, Maybe Statistic)
           -> IMap.IntMap (Double, Maybe Statistic)
fillIntMap (Size size) m =
    foldl'
        (\acc x -> IMap.alter (maybe (Just (0, Nothing)) Just) x acc)
        m
        [0..size - 1]

-- | Rank the node correspondence scores.
rankNodeCorrScores :: IDVec -> NodeCorrScores -> [(Double, ID)]
rankNodeCorrScores (IDVec idVec) = zip [1..]
                                 . fmap fst
                                 . sortBy (compare `on` snd)
                                 . V.toList
                                 . V.imap (\ !i !v -> (idVec V.! i, v))
                                 . VS.convert
                                 . fmap fst
                                 . unNodeCorrScores

-- | Get the accuracy of a run. In this case, we get the total rank below the
-- number of permuted vertices divided by the theoretical maximum (so if there were
-- five changed vertices out off 10 and two were rank 8 and 10 while the others
-- were in the top five, we would have (1 - ((3 + 5) / (10 + 9 + 8 + 7 + 6))) as
-- the accuracy."
getAccuracy :: Set.Set ID -> IDVec -> V.Vector NodeCorrScoresInfo -> Double
getAccuracy truth (IDVec idVec) =
    (1 -)
        . (/ fact)
        . sum
        . filter (> 0)
        . fmap ( flip
                    (-)
                    (fromIntegral . Set.size $ truth)
               . fst
               )
        . filter (flip Set.member truth . snd)
        . rankNodeCorrScores (IDVec idVec)
        . NodeCorrScores
        . fmap ((, Nothing) . fromJust . avgNodeCorrScores)
  where
    fact = fromIntegral
         . sum
         . fmap ((V.length idVec - length truth) -)
         $ [0..(length truth - 1)]

-- | Faster nub.
nub' :: (Eq a, Ord a) => [a] -> [a]
nub' = Set.toList . Set.fromList