{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, KindSignatures, TemplateHaskell #-} module Distribution.Skete.Haskell.Package ( createIndexTar , UpdateConfig(..), HasUpdateConfig(..) , PackageEvent(..) , getIndexRemaining, streamIndex, elideSeen , applyPackageEvent , tarPath2PackageVersion, getPackageTar, getCabalRevision ) where import Prelude hiding (lookup) import Data.Foldable import Data.Int import qualified Data.List as List import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Entry as Tar import Control.Monad.Trans import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as BSL import qualified Data.Conduit as C import qualified Data.Conduit.List as CL import Data.Either (partitionEithers) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Encoding as TE import Data.Time import Network.Wreq import Control.Lens import System.FilePath import Control.Monad.Catch (MonadThrow, MonadCatch) import qualified Data.ListTrie.Map.Ord as LT import qualified Data.Map as Map import Data.Sequence (Seq) import qualified Control.Monad.Catch as E import qualified Control.Exception as EB import Control.Monad.Log (MonadLog) import qualified Control.Monad.Log as Log import Data.Time.Clock.POSIX import Control.Monad.Reader.Class import qualified System.IO.Error as IOE import Distribution.PackageDescription (GenericPackageDescription(packageDescription), PackageDescription(customFieldsPD)) import Distribution.PackageDescription.Parsec (parseGenericPackageDescription, runParseResult, ParseResult) import qualified System.AtomicWrite.Writer.Text as TIO import System.Directory (createDirectoryIfMissing) import Safe import Distribution.Skete.Storage.Interface import Distribution.Skete.DownloadUtils import Distribution.Skete.TarUtils data DatabaseCorruption = DatabaseCorruption deriving (Show) instance EB.Exception DatabaseCorruption tshow :: Show s => s -> Text tshow = T.pack . show createIndexTar :: [(FilePath, BSL.ByteString)] -> ([String], BSL.ByteString) createIndexTar entries = Tar.write <$> entries' where entries' = partitionEithers [flip Tar.fileEntry file <$> tarPath | (path, file) <- entries, let tarPath = Tar.toTarPath False path] data UpdateConfig = UpdateConfig { _hackageSchemeHost :: Text , _targetPackageSet :: PackageSet , _tarballCacheDir :: Maybe FilePath } deriving (Read, Show, Eq, Ord) makeClassy ''UpdateConfig data PackageEvent = PackageRevision { peWhen :: UTCTime , pePackageVersion :: PackageVersion , peCabal :: BS.ByteString } | PackagePreferred { peWhen :: UTCTime , pePackage :: Package , pePreferred :: BS.ByteString } deriving (Ord, Eq, Show, Read) {- | Walk the entries in the (read) tar passed in, and emit package events based on the contents that represent them. This is done in tar order, which happens to, importantly, be temporal order. -} streamIndex :: (MonadThrow m, MonadLog env m) => Tar.Entries Tar.FormatError -> C.Source m PackageEvent streamIndex Tar.Done = lift . Log.info $ "Current 01-index.tar completed" streamIndex (Tar.Fail e) = do lift . Log.error . mconcat $ ["Tar failed with: ", tshow e] E.throwM e streamIndex (Tar.Next ent t) = do case Tar.entryContent ent of Tar.NormalFile cabalFl _ | (takeExtension . Tar.entryPath $ ent) == ".cabal" -> do lift . Log.debug . mconcat $ ["Processing cabal entry: ", tshow ent] case tarPath2PackageVersion . Tar.entryPath $ ent of Nothing -> do lift . Log.error . mconcat $ [ "Failed to parse PackageVersion of '" , T.pack . Tar.entryPath $ ent, "'!"] E.throwM . EB.ErrorCall $ "PackageVersion parse failed!" Just pv -> do lift . Log.info . mconcat $ ["Got cabal entry for: ", tshow pv] C.yield $ PackageRevision (posixSecondsToUTCTime . realToFrac . Tar.entryTime $ ent) pv . BSL.toStrict $ cabalFl Tar.NormalFile prefVersion _ | (takeFileName . Tar.entryPath $ ent) == "preferred-versions" -> do lift . Log.debug . mconcat $ ["Processing preferred-versions entry: ", tshow ent] case fmap dropTrailingPathSeparator . splitPath . Tar.entryPath $ ent of [pn, "preferred-versions"] -> do lift . Log.info . mconcat $ ["Got preferred-versions entry for: ", T.pack pn] C.yield $ PackagePreferred (posixSecondsToUTCTime . realToFrac . Tar.entryTime $ ent) (T.pack pn) . BSL.toStrict $ prefVersion _ -> do lift . Log.error . mconcat $ [ "Failed to parse Package of '" , T.pack . Tar.entryPath $ ent, "'!" ] E.throwM . EB.ErrorCall $ "Package parse failed!" Tar.NormalFile _ _ | (takeFileName . Tar.entryPath $ ent) == "package.json" -> do lift . Log.debug . mconcat $ ["Ignoring package.json ", T.pack . Tar.entryPath $ ent] return () _ -> do lift . Log.warning . mconcat $ ["Unknown tar entry: ", tshow ent] streamIndex t {- | Parse a path in the 01-index.tar into a PackageVersion, if possible. -} tarPath2PackageVersion :: FilePath -> Maybe PackageVersion tarPath2PackageVersion path = case map init . splitPath . fst . splitFileName $ path of [pkgName, pkgVerStr] -> fmap (PV (T.pack pkgName)) . readMay $ pkgVerStr _ -> Nothing parseCabalByteString :: BS.ByteString -> ParseResult GenericPackageDescription parseCabalByteString bs = parseGenericPackageDescription bs elideSeen :: forall t m env d r c . ( MonadTrans t, MonadThrow (t m), MonadLog env (t m), SketeStorage d m r , HasUpdateConfig c, MonadReader c (t m)) => C.Conduit PackageEvent (t m) PackageEvent elideSeen = flip CL.concatMapAccumM mempty $ skipEvents where skipEvents :: PackageEvent -> Seq PackageEvent -> t m (Seq PackageEvent, [PackageEvent]) skipEvents (pe@(PackageRevision _ pv cblFl)) delayedEvents = do ps <- view targetPackageSet mcabal <- lift $ labelDataLookup ps pv "cabal" case ( (runParseResult . parseCabalByteString) <$> mcabal , runParseResult . parseCabalByteString $ cblFl) of (Just (_, Left (_, perr)), _) -> do Log.error . mconcat $ ["Failed to parse old cabal file for ", tshow pv, ": ", tshow perr] E.throwM DatabaseCorruption (_, (_, Left (_, perr))) -> do Log.warning . mconcat $ ["Failed to parse new cabal file for ", tshow pv, ": ", tshow perr] return (delayedEvents, []) (Nothing, (_, Right _)) -> do Log.debug . mconcat $ ["cabal for ", tshow pv, " not found, assuming package is new"] return (mempty, toList $ delayedEvents |> pe) (Just (_, Right oldCabal), (_, Right newCabal)) -> do case ((getCabalRevision oldCabal), (getCabalRevision newCabal)) of (Nothing, _) -> do Log.error . mconcat $ ["Could not parse x-revision field of old cabal file for ", tshow pv] E.throwM . EB.AssertionFailed $ "x-revision field of old "<>show pv<>" cabal not parsable" (_, Nothing) -> do Log.warning . mconcat $ ["Could not parse x-revision field of new cabal file for ", tshow pv] return (delayedEvents, []) (Just oldRev, Just newRev) -> case oldRev `compare` newRev of LT -> do Log.debug . mconcat $ ["Old revision of ", tshow pv, " is less then the new cabal, passing it"] return (mempty, toList $ delayedEvents |> pe) EQ -> do Log.debug . mconcat $ ["Old revision of ", tshow pv, " is the same as new cabal, eliding it"] return (mempty, []) GT -> do Log.debug . mconcat $ ["Old revision of ", tshow pv, " is newer then the new cabal, eliding it"] return (mempty, []) skipEvents (pp@(PackagePreferred {})) delayedEvents = do return (delayedEvents |> pp, []) getCabalRevision :: GenericPackageDescription -> Maybe Integer getCabalRevision = maybe (Just 0) readMay . List.lookup "x-revision" . customFieldsPD . packageDescription {- | Process a PackageRevision, which represents a new .cabal file for a package. This adds it, and if a cabal file for it isn't already found in the repository, the source for it, to the skete repository. -} applyPackageEvent :: ( MonadTrans t, MonadLog env (t m), MonadCatch (t m) , SketeStorage d m r , HasUpdateConfig c, MonadReader c (t m)) => PackageEvent -> t m () applyPackageEvent (PackageRevision occured pv cbl) = do ps <- view targetPackageSet Log.info . mconcat $ ["Inserting new package-version ", tshow pv] mr <- lift $ lookup pv case mr of Nothing -> do Log.debug . mconcat $ ["Adding tar contents for ", tshow pv] tar <- getPackageTar pv case explodeTar (show pv) tar of Left tarErr -> do Log.warning . mconcat $ ["Tar corruption found for ", tshow pv, " of type ", tshow tarErr] Right tarConts -> do r <- lift $ add' pv () occured . LT.filterWithKey (\k _ -> not . any (flip elem [".git", ".", ".."]) $ k) $ tarConts lift $ label' r (Map.singleton "cabal" cbl) ps occured Just r -> do Log.debug . mconcat $ ["Updating cabal for ", tshow $ pv] lift $ label' r (Map.singleton "cabal" $ cbl) ps occured Log.info . mconcat $ ["Updated ", tshow pv] applyPackageEvent (PackagePreferred occured pn pref) = do ps <- view targetPackageSet Log.info . mconcat $ ["Inserting new preferred-versions for ", pn] mOldPreferred <- lift $ packageDataLookup ps pn ("preferred-versions") case mOldPreferred of Nothing -> do Log.debug . mconcat $ ["Setting initial preferred-versions for ", pn] lift $ labelPackage' ps pn (Map.singleton "preferred-versions" $ pref) occured Just oldPref | oldPref == pref -> do Log.debug . mconcat $ ["No change in preferred-versions for ", pn] Just _ -> do Log.debug . mconcat $ ["Setting new preferred-versions for ", pn] lift $ labelPackage' ps pn (Map.singleton "preferred-versions" $ pref) occured Log.info . mconcat $ ["Set preferreds for ", pn] {- | Figure out where we should continue reading the hackage 01-index.tar There are two ways we do this: - First, if we have a record of where we left off, we trust that and pick up there. - Second, if we don't, we download the current index, and read it to find the point where its current set of current revisions best matches ours. Then we return the rest of the current index. In either case, we might get events we've already seen, so we should skip those when we use the output of this function, which can be done by 'elideSeen'. When the function we've been passed returns successfully, we assume that all the remaining index has been processed, and update the stored offset. -} getIndexRemaining :: forall (t :: (* -> *) -> * -> *) m env c a . ( MonadIO (t m), MonadCatch (t m), MonadLog env (t m) , HasUpdateConfig c, MonadReader c (t m)) => (Tar.Entries Tar.FormatError -> t m a) -> t m a getIndexRemaining act = do indexUrl <- (`T.append` "/01-index.tar") <$> view hackageSchemeHost Log.info . mconcat $ ["Following updates from ", indexUrl] mOldOff <- followFromExisting Log.info $ maybe "Starting from the start of the index" (\i -> "Trying to read the index from " `T.append` (T.pack . show $ i)) mOldOff let opts = maybe defaults (\offset -> defaults & header "Range" .~ [TE.encodeUtf8 . T.pack $ "bytes="++show offset++"-"]) mOldOff {- r <- liftIO $ getWith opts indexUrl let tarTail = GZip.decompress $ r ^. responseBody newOffset <- case mOldOff of -- We asked for a range, and actually got a partial response back, -- so we're safe to start from here. Just offset | r ^. responseStatus.statusCode == 206 -> do Log.info "Successful restart from mid-index" return $ offset+BSL.length tarTail -- If either of those conditions don't hold, we look at every event again. _ -> do Log.info "Replaying full index" return $ BSL.length tarTail liftIO $ BSL.writeFile "01-index.tar" tarTail -} tarTail <- liftIO $ BSL.readFile "01-index.tar" let newOffset = BSL.length tarTail b <- act . Tar.read $ tarTail liftIO . createDirectoryIfMissing False $ storageDir liftIO . TIO.atomicWriteFile offsetStorage . tshow $ newOffset return b where followFromExisting :: t m (Maybe Int64) followFromExisting = E.handleJust (\e -> if IOE.isDoesNotExistErrorType . IOE.ioeGetErrorType $ e then Just Nothing else Nothing) return $ do Log.debug "Trying to read offset from storage." liftIO $ (readMay . T.unpack) <$> TIO.readFile offsetStorage storageDir :: FilePath storageDir = ".git/skete-haskell" offsetStorage :: FilePath offsetStorage = storageDir "hackage-index-offset" {- | Aquire the tarball for this package, either from our cache or off the network. -} getPackageTar :: (MonadIO m, MonadCatch m, MonadLog env m, HasUpdateConfig c, MonadReader c m) => PackageVersion -> m BSL.ByteString getPackageTar pkg = retryingHTTP 3 $ do mCacheDir <- view tarballCacheDir let uri = T.intercalate "/" $ [ "https://hackage.haskell.org/package" , tshow pkg, tshow pkg <> ".tar.gz"] case mCacheDir of Just cacheDir -> cachingGet (cacheDir show pkg) uri Nothing -> cachelessGet uri