{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}

-- |
-- Module      :  ELynx.Tools.Reproduction
-- Description :  Functions to ease reproduction of analyses
-- Copyright   :  (c) Dominik Schrempf 2021
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Tue Nov 19 15:07:09 2019.
--
-- Use of standard input is not supported.
module ELynx.Tools.Reproduction
  ( -- * Reproduction
    SeedOpt (..),
    Reproducible (..),
    getReproductionHash,
    Reproduction (..),
    writeReproduction,
    hashFile,

    -- * Re-exports
    Generic,
    FromJSON,
    ToJSON,
  )
where

import Control.Monad
import Crypto.Hash.SHA256
import Data.Aeson hiding (encode)
import Data.ByteString.Base16
import qualified Data.ByteString.Char8 as BS
import qualified Data.Vector.Unboxed as VU
import Data.Version
import Data.Word
import GHC.Generics
import Options.Applicative
import Paths_elynx_tools
import System.Environment

-- | Random or fixed seed.
data SeedOpt = RandomUnset | RandomSet (VU.Vector Word32) | Fixed (VU.Vector Word32)
  deriving (SeedOpt -> SeedOpt -> Bool
(SeedOpt -> SeedOpt -> Bool)
-> (SeedOpt -> SeedOpt -> Bool) -> Eq SeedOpt
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SeedOpt -> SeedOpt -> Bool
$c/= :: SeedOpt -> SeedOpt -> Bool
== :: SeedOpt -> SeedOpt -> Bool
$c== :: SeedOpt -> SeedOpt -> Bool
Eq, (forall x. SeedOpt -> Rep SeedOpt x)
-> (forall x. Rep SeedOpt x -> SeedOpt) -> Generic SeedOpt
forall x. Rep SeedOpt x -> SeedOpt
forall x. SeedOpt -> Rep SeedOpt x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SeedOpt x -> SeedOpt
$cfrom :: forall x. SeedOpt -> Rep SeedOpt x
Generic, Int -> SeedOpt -> ShowS
[SeedOpt] -> ShowS
SeedOpt -> String
(Int -> SeedOpt -> ShowS)
-> (SeedOpt -> String) -> ([SeedOpt] -> ShowS) -> Show SeedOpt
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SeedOpt] -> ShowS
$cshowList :: [SeedOpt] -> ShowS
show :: SeedOpt -> String
$cshow :: SeedOpt -> String
showsPrec :: Int -> SeedOpt -> ShowS
$cshowsPrec :: Int -> SeedOpt -> ShowS
Show)

instance FromJSON SeedOpt

instance ToJSON SeedOpt

-- | Reproducible commands have
--   - a set of input files to be checked for consistency,
--   - a set of output suffixes which define output files to be checked for consistency,
--   - a function to get the seed, if available,
--   - a function to set the seed, if applicable,
--   - a parser to read the command line,
--   - a nice program name, description, and footer.
class Reproducible a where
  inFiles :: a -> [FilePath]
  outSuffixes :: a -> [String]
  getSeed :: a -> Maybe SeedOpt
  setSeed :: a -> SeedOpt -> a
  parser :: Parser a
  cmdName :: String
  cmdDsc :: [String]
  cmdFtr :: [String]
  cmdFtr = []

-- | A unique hash of the reproduction data type.
getReproductionHash :: forall a. Reproducible a => Reproduction a -> String
getReproductionHash :: Reproduction a -> String
getReproductionHash Reproduction a
r =
  ByteString -> String
BS.unpack (ByteString -> String) -> ByteString -> String
forall a b. (a -> b) -> a -> b
$
    ByteString -> ByteString
encode (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$
      ByteString -> ByteString
hash (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$
        String -> ByteString
BS.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$
          [String] -> String
unlines ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$
            -- Reproduction.
            Reproduction a -> String
forall a. Reproduction a -> String
progName Reproduction a
r String -> [String] -> [String]
forall a. a -> [a] -> [a]
:
            Reproduction a -> [String]
forall a. Reproduction a -> [String]
argsStr Reproduction a
r
              [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> [Version -> String
showVersion (Reproduction a -> Version
forall a. Reproduction a -> Version
rVersion Reproduction a
r)]
              [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> Reproduction a -> [String]
forall a. Reproduction a -> [String]
files Reproduction a
r
              [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> Reproduction a -> [String]
forall a. Reproduction a -> [String]
checkSums Reproduction a
r
              -- Reproducible.
              [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> a -> [String]
forall a. Reproducible a => a -> [String]
inFiles a
ri
              [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> a -> [String]
forall a. Reproducible a => a -> [String]
outSuffixes a
ri
              [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> [Reproducible a => String
forall a. Reproducible a => String
cmdName @a]
              [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> Reproducible a => [String]
forall a. Reproducible a => [String]
cmdDsc @a
              [String] -> [String] -> [String]
forall a. Semigroup a => a -> a -> a
<> Reproducible a => [String]
forall a. Reproducible a => [String]
cmdFtr @a
  where
    ri :: a
ri = Reproduction a -> a
forall a. Reproduction a -> a
reproducible Reproduction a
r

setHash :: Reproducible a => Reproduction a -> Reproduction a
setHash :: Reproduction a -> Reproduction a
setHash Reproduction a
r = Reproduction a
r {rHash :: Maybe String
rHash = String -> Maybe String
forall a. a -> Maybe a
Just String
h} where h :: String
h = Reproduction a -> String
forall a. Reproducible a => Reproduction a -> String
getReproductionHash Reproduction a
r

-- | Necessary information for a reproducible run. Notably, the input files are
-- checked for consistency!
data Reproduction a = Reproduction
  { -- | Program name.
    Reproduction a -> String
progName :: String,
    -- | Command line arguments without program name.
    Reproduction a -> [String]
argsStr :: [String],
    Reproduction a -> Version
rVersion :: Version,
    -- | Unique hash; see 'getReproductionHash'.
    Reproduction a -> Maybe String
rHash :: Maybe String,
    -- | File paths of used files.
    Reproduction a -> [String]
files :: [FilePath],
    -- | SHA256 sums of used files.
    Reproduction a -> [String]
checkSums :: [String],
    -- | Command argument.
    Reproduction a -> a
reproducible :: a
  }
  deriving ((forall x. Reproduction a -> Rep (Reproduction a) x)
-> (forall x. Rep (Reproduction a) x -> Reproduction a)
-> Generic (Reproduction a)
forall x. Rep (Reproduction a) x -> Reproduction a
forall x. Reproduction a -> Rep (Reproduction a) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall a x. Rep (Reproduction a) x -> Reproduction a
forall a x. Reproduction a -> Rep (Reproduction a) x
$cto :: forall a x. Rep (Reproduction a) x -> Reproduction a
$cfrom :: forall a x. Reproduction a -> Rep (Reproduction a) x
Generic)

instance FromJSON a => FromJSON (Reproduction a)

instance ToJSON a => ToJSON (Reproduction a)

-- | Helper function.
hashFile :: FilePath -> IO BS.ByteString
hashFile :: String -> IO ByteString
hashFile String
f = ByteString -> ByteString
encode (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
hash (ByteString -> ByteString) -> IO ByteString -> IO ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
BS.readFile String
f

-- | Write an ELynx reproduction file.
writeReproduction ::
  forall a.
  (Eq a, Show a, Reproducible a, ToJSON a) =>
  String ->
  a ->
  IO ()
writeReproduction :: String -> a -> IO ()
writeReproduction String
bn a
r = do
  String
pn <- IO String
getProgName
  [String]
as <- IO [String]
getArgs
  let outFs :: [String]
outFs = ShowS -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (String
bn String -> ShowS
forall a. [a] -> [a] -> [a]
++) (a -> [String]
forall a. Reproducible a => a -> [String]
outSuffixes a
r)
  let fs :: [String]
fs = a -> [String]
forall a. Reproducible a => a -> [String]
inFiles a
r [String] -> [String] -> [String]
forall a. [a] -> [a] -> [a]
++ [String]
outFs
  [ByteString]
cs <- (String -> IO ByteString) -> [String] -> IO [ByteString]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM String -> IO ByteString
hashFile [String]
fs
  let cs' :: [String]
cs' = (ByteString -> String) -> [ByteString] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> String
BS.unpack [ByteString]
cs
      s :: Reproduction a
s = String
-> [String]
-> Version
-> Maybe String
-> [String]
-> [String]
-> a
-> Reproduction a
forall a.
String
-> [String]
-> Version
-> Maybe String
-> [String]
-> [String]
-> a
-> Reproduction a
Reproduction String
pn [String]
as Version
version Maybe String
forall a. Maybe a
Nothing [String]
fs [String]
cs' a
r
  IO () -> IO ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> Reproduction a -> IO ()
forall a. ToJSON a => String -> a -> IO ()
encodeFile (String
bn String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
".elynx") (Reproduction a -> Reproduction a
forall a. Reproducible a => Reproduction a -> Reproduction a
setHash Reproduction a
s)