{-# LANGUAGE OverloadedStrings #-}
module Darcs.UI.Commands.Convert.Import ( convertImport ) where
import Darcs.Prelude hiding ( readFile, lex )
import Control.Applicative ((<|>),many)
import Control.Arrow ((&&&), second)
import Control.Monad (unless, void, when)
import Control.Monad.State.Strict (gets, modify)
import Control.Monad.Trans (liftIO)
import qualified Data.Attoparsec.ByteString.Char8 as A
import Data.Attoparsec.ByteString.Char8 ((<?>))
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy.Char8 as BLC
import Data.IORef (modifyIORef, newIORef)
import Data.Maybe (fromMaybe)
import Data.Word (Word8)
import System.Directory (doesFileExist)
import System.FilePath.Posix ((</>))
import System.IO (stdin)
import Darcs.Patch.Depends ( getUncovered )
import Darcs.Patch.PatchInfoAnd ( n2pia )
import Darcs.Patch ( PrimOf, RepoPatch, move )
import Darcs.Patch.Apply ( ApplyState )
import Darcs.Patch.Named ( Named(..), infopatch )
import Darcs.Patch.Witnesses.Ordered
( FL(..)
, RL(..)
, (+<+)
, reverseFL
, reverseRL
)
import Darcs.Patch.Witnesses.Sealed ( Sealed(..), unFreeLeft )
import Darcs.Patch.Info ( PatchInfo, patchinfo )
import Darcs.Patch.Prim ( sortCoalesceFL )
import Darcs.Patch.RepoType ( IsRepoType(..) )
import Darcs.Repository
( EmptyRepository(..)
, Repository
, cleanRepository
, createPristineDirectoryTree
, createRepository
, finalizeRepositoryChanges
, readTentativeRepo
, repoCache
, repoLocation
, revertRepositoryChanges
, withUMaskFlag
)
import Darcs.Repository.Diff (treeDiff)
import Darcs.Repository.Flags (Compression(..), DiffAlgorithm(PatienceDiff))
import Darcs.Repository.Hashed (addToTentativeInventory)
import Darcs.Repository.Paths (pristineDirPath, tentativePristinePath)
import Darcs.Repository.Prefs (FileType(..))
import Darcs.Repository.State (readRecorded)
import Darcs.UI.Commands
( DarcsCommand(..)
, nodefaults
, withStdOpts
)
import Darcs.UI.Commands.Convert.Util
( Marks
, addMark
, emptyMarks
, getMark
, patchHash
, updatePending
)
import Darcs.UI.Commands.Util.Tree (treeHasDir, treeHasFile)
import Darcs.UI.Completion (noArgs)
import Darcs.UI.Flags
( DarcsFlag
, patchFormat
, patchIndexNo
, umask
, useCache
, withWorkingDir
)
import Darcs.UI.Options
( (?)
, (^)
, defaultFlags
, ocheck
, odesc
)
import qualified Darcs.UI.Options.All as O
import Darcs.Util.ByteString (decodeLocale, unpackPSFromUTF8)
import Darcs.Util.DateTime
( formatDateTime
, parseDateTime
, startOfTime
)
import Darcs.Util.Global (darcsdir)
import Darcs.Util.Hash (Hash(..), encodeBase16, sha256)
import Darcs.Util.Lock (withNewDirectory)
import Darcs.Util.Path
( AbsolutePath
, AnchoredPath(..)
, appendPath
, floatPath
, makeName
, parent
, darcsdirName
)
import Darcs.Util.Printer ( Doc, text )
import qualified Darcs.Util.Tree as T
import Darcs.Util.Tree
( Tree
, TreeItem(..)
, findTree
, listImmediate
, readBlob
, treeHash
)
import Darcs.Util.Tree.Hashed (darcsAddMissingHashes, hashedTreeIO)
import qualified Darcs.Util.Tree.Monad as TM
import Darcs.Util.Tree.Monad hiding (createDirectory, exists, rename)
convertImportHelp :: Doc
convertImportHelp = text $ unlines
[ "This command imports git repositories into new darcs repositories."
, "Further options are accepted (see `darcs help init`)."
, ""
, "To convert a git repo to a new darcs one you may run:"
, ""
, " $ (cd gitrepo && git fast-export --all -M) | darcs convert import darcsmirror"
, ""
, "WARNING: git repositories with branches will produce weird results,"
, " use at your own risks."
, ""
, "Incremental import with marksfiles is currently not supported."
]
convertImport :: DarcsCommand
convertImport = DarcsCommand
{ commandProgramName = "darcs"
, commandName = "import"
, commandHelp = convertImportHelp
, commandDescription = "Import from a git-fast-export stream into darcs"
, commandExtraArgs = -1
, commandExtraArgHelp = ["[<DIRECTORY>]"]
, commandCommand = fastImport
, commandPrereq = \_ -> return $ Right ()
, commandCompleteArgs = noArgs
, commandArgdefaults = nodefaults
, commandAdvancedOptions = odesc convertImportAdvancedOpts
, commandBasicOptions = odesc convertImportBasicOpts
, commandDefaults = defaultFlags convertImportOpts
, commandCheckOptions = ocheck convertImportOpts
}
where
convertImportBasicOpts
= O.newRepo
^ O.setScriptsExecutable
^ O.patchFormat
^ O.withWorkingDir
convertImportAdvancedOpts = O.patchIndexNo ^ O.umask
convertImportOpts = convertImportBasicOpts `withStdOpts` convertImportAdvancedOpts
type Marked = Maybe Int
type Branch = B.ByteString
type AuthorInfo = B.ByteString
type Message = B.ByteString
type Content = B.ByteString
type Tag = B.ByteString
data RefId = MarkId Int | HashId B.ByteString | Inline
deriving Show
data CopyRenameNames = Quoted B.ByteString B.ByteString
| Unquoted B.ByteString deriving Show
data Object = Blob (Maybe Int) Content
| Reset Branch (Maybe RefId)
| Commit Branch Marked AuthorInfo Message
| Tag Tag Int AuthorInfo Message
| Modify (Either Int Content) B.ByteString
| Gitlink B.ByteString
| Copy CopyRenameNames
| Rename CopyRenameNames
| Delete B.ByteString
| From Int
| Merge Int
| Progress B.ByteString
| End
deriving Show
type Ancestors = (Marked, [Int])
data State p where
Toplevel :: Marked -> Branch -> State p
InCommit :: Marked -> Ancestors -> Branch -> Tree IO -> RL (PrimOf p) cX cY -> PatchInfo -> State p
Done :: State p
instance Show (State p) where
show Toplevel {} = "Toplevel"
show InCommit {} = "InCommit"
show Done = "Done"
fastImport :: (AbsolutePath, AbsolutePath) -> [DarcsFlag] -> [String] -> IO ()
fastImport _ opts [outrepo] =
withUMaskFlag (umask ? opts) $ withNewDirectory outrepo $ do
EmptyRepository _repo <- createRepository
(patchFormat ? opts)
(withWorkingDir ? opts)
(patchIndexNo ? opts)
(useCache ? opts)
_repo <- revertRepositoryChanges _repo (updatePending opts)
marks <- fastImport' _repo emptyMarks
_ <- finalizeRepositoryChanges _repo (updatePending opts) GzipCompression
cleanRepository _repo
createPristineDirectoryTree _repo "." (withWorkingDir ? opts)
return marks
fastImport _ _ _ = fail "I need exactly one output repository."
fastImport' :: forall rt p r u . (IsRepoType rt, RepoPatch p, ApplyState p ~ Tree) =>
Repository rt p r u r -> Marks -> IO ()
fastImport' repo marks = do
pristine <- readRecorded repo
marksref <- newIORef marks
let initial = Toplevel Nothing $ BC.pack "refs/branches/master"
go :: State p -> B.ByteString -> TreeIO ()
go state rest = do (rest', item) <- parseObject rest
state' <- process state item
case state' of
Done -> return ()
_ -> go state' rest'
markpath :: Int -> AnchoredPath
markpath n = floatPath (darcsdir </> "marks")
`appendPath` (either error id $ makeName $ show (n `div` 1000))
`appendPath` (either error id $ makeName $ show (n `mod` 1000))
makeinfo author message tag = do
let (name, log) = case unpackPSFromUTF8 message of
"" -> ("Unnamed patch", [])
msg -> (head &&& tail) . lines $ msg
(author'', date'') = span (/='>') $ unpackPSFromUTF8 author
date' = dropWhile (`notElem` ("0123456789" :: String)) date''
author' = author'' ++ ">"
date = formatDateTime "%Y%m%d%H%M%S" $ fromMaybe startOfTime (parseDateTime "%s %z" date')
liftIO $ patchinfo date (if tag then "TAG " ++ name else name) author' log
addtag author msg =
do info_ <- makeinfo author msg True
gotany <- liftIO $ doesFileExist tentativePristinePath
deps <- if gotany then liftIO $
getUncovered `fmap`
readTentativeRepo repo (repoLocation repo)
else return []
let patch :: Named p wA wA
patch = NamedP info_ deps NilFL
liftIO $ addToTentativeInventory (repoCache repo) GzipCompression (n2pia patch)
updateHashes = do
let nodarcs = \(AnchoredPath (x:_)) _ -> x /= darcsdirName
hashblobs (File blob@(T.Blob con NoHash)) =
do hash <- sha256 `fmap` readBlob blob
return $ File (T.Blob con hash)
hashblobs x = return x
tree' <- liftIO . T.partiallyUpdateTree hashblobs nodarcs =<< gets tree
modify $ \s -> s { tree = tree' }
return $ T.filter nodarcs tree'
deleteEmptyParents fp =
case parent fp of
Nothing -> return ()
Just directParent -> do
parentTree <- flip findTree directParent <$> gets tree
case (null . listImmediate) <$> parentTree of
Just True -> do TM.unlink directParent
deleteEmptyParents directParent
_ -> return ()
diffCurrent :: State p -> TreeIO (State p)
diffCurrent (InCommit mark ancestors branch start ps info_) = do
current <- updateHashes
Sealed diff <- unFreeLeft `fmap`
liftIO (treeDiff PatienceDiff (const TextFile) start current)
let newps = ps +<+ reverseFL diff
return $ InCommit mark ancestors branch current newps info_
diffCurrent _ = error "This is never valid outside of a commit."
process :: State p -> Object -> TreeIO (State p)
process s (Progress p) = do
liftIO $ putStrLn ("progress " ++ decodeLocale p)
return s
process (Toplevel _ _) End = do
tree' <- (liftIO . darcsAddMissingHashes) =<< updateHashes
modify $ \s -> s { tree = tree' }
let root = encodeBase16 $ treeHash tree'
liftIO $ do
putStrLn "\\o/ It seems we survived. Enjoy your new repo."
B.writeFile tentativePristinePath $ BC.concat [BC.pack "pristine:", root]
return Done
process (Toplevel n b) (Tag tag what author msg) = do
if Just what == n
then addtag author msg
else liftIO $ putStrLn $
"WARNING: Ignoring out-of-order tag " ++ decodeLocale tag
return (Toplevel n b)
process (Toplevel n _) (Reset branch from) =
do case from of
(Just (MarkId k)) | Just k == n ->
addtag (BC.pack "Anonymous Tagger <> 0 +0000") branch
_ -> liftIO $ putStrLn $ "WARNING: Ignoring out-of-order tag " ++
decodeLocale branch
return $ Toplevel n branch
process (Toplevel n b) (Blob (Just m) bits) = do
TM.writeFile (markpath m) (BLC.fromChunks [bits])
return $ Toplevel n b
process x (Gitlink link) = do
liftIO $ putStrLn $ "WARNING: Ignoring gitlink " ++ decodeLocale link
return x
process (Toplevel previous pbranch) (Commit branch mark author message) = do
when (pbranch /= branch) $ do
liftIO $ putStrLn ("Tagging branch: " ++ decodeLocale pbranch)
addtag author pbranch
info_ <- makeinfo author message False
startstate <- updateHashes
return $ InCommit mark (previous, []) branch startstate NilRL info_
process s@InCommit {} (Modify (Left m) path) = do
TM.copy (markpath m) (decodePath path)
diffCurrent s
process s@InCommit {} (Modify (Right bits) path) = do
TM.writeFile (decodePath path) (BLC.fromChunks [bits])
diffCurrent s
process s@InCommit {} (Delete path) = do
let floatedPath = decodePath path
TM.unlink floatedPath
deleteEmptyParents floatedPath
diffCurrent s
process (InCommit mark (prev, current) branch start ps info_) (From from) =
return $ InCommit mark (prev, from:current) branch start ps info_
process (InCommit mark (prev, current) branch start ps info_) (Merge from) =
return $ InCommit mark (prev, from:current) branch start ps info_
process s@InCommit {} (Copy names) = do
(from, to) <- extractNames names
TM.copy (decodePath from) (decodePath to)
diffCurrent s
process s@(InCommit mark ancestors branch start _ info_) (Rename names) = do
(from, to) <- extractNames names
let uFrom = decodePath from
uTo = decodePath to
case parent uTo of
Nothing ->
return ()
Just parentDir -> do
targetDirExists <- liftIO $ treeHasDir start uTo
targetFileExists <- liftIO $ treeHasFile start uTo
parentDirExists <-
liftIO $ treeHasDir start parentDir
if targetDirExists || targetFileExists
then TM.unlink uTo
else unless parentDirExists $ TM.createDirectory parentDir
(InCommit _ _ _ _ newPs _) <- diffCurrent s
TM.rename uFrom uTo
let ps' = newPs :<: move uFrom uTo
current <- updateHashes
deleteEmptyParents uFrom
diffCurrent (InCommit mark ancestors branch current ps' info_)
process (InCommit mark ancestors branch _ ps info_) x = do
case ancestors of
(_, []) -> return ()
(Just n, list)
| n `elem` list -> return ()
| otherwise -> liftIO $ putStrLn $
"WARNING: Linearising non-linear ancestry:" ++
" currently at " ++ show n ++ ", ancestors " ++ show list
(Nothing, list) ->
liftIO $ putStrLn $ "WARNING: Linearising non-linear ancestry " ++ show list
(prims :: FL (PrimOf p) cX cY) <- return $ sortCoalesceFL $ reverseRL ps
let patch :: Named p cX cY
patch = infopatch info_ prims
liftIO $ addToTentativeInventory (repoCache repo)
GzipCompression (n2pia patch)
case mark of
Nothing -> return ()
Just n -> case getMark marks n of
Nothing -> liftIO $ modifyIORef marksref $ \m -> addMark m n (patchHash $ n2pia patch)
Just n' -> fail $ "FATAL: Mark already exists: " ++ decodeLocale n'
process (Toplevel mark branch) x
process state obj = do
liftIO $ print obj
fail $ "Unexpected object in state " ++ show state
extractNames :: CopyRenameNames
-> TreeIO (BC.ByteString, BC.ByteString)
extractNames names = case names of
Quoted f t -> return (f, t)
Unquoted uqNames -> do
let spaceIndices = BC.elemIndices ' ' uqNames
splitStr = second (BC.drop 1) . flip BC.splitAt uqNames
spaceComponents = reverse $ map splitStr spaceIndices
componentCount = length spaceComponents
if componentCount == 1
then return $ head spaceComponents
else do
let dieMessage = unwords
[ "Couldn't determine move/rename"
, "source/destination filenames, with the"
, "data produced by this (old) version of"
, "git, since it uses unquoted, but"
, "special-character-containing paths."
]
lPathExists (l,_) =
TM.fileExists $ decodePath l
finder [] = error dieMessage
finder (x : rest) = do
xExists <- lPathExists x
if xExists then return x else finder rest
finder spaceComponents
void $ hashedTreeIO (go initial B.empty) pristine pristineDirPath
parseObject :: BC.ByteString -> TreeIO ( BC.ByteString, Object )
parseObject = next' mbObject
where mbObject = A.parse p_maybeObject
p_maybeObject = Just `fmap` p_object
<|> (A.endOfInput >> return Nothing)
lex p = p >>= \x -> A.skipSpace >> return x
lexString s = A.string (BC.pack s) >> A.skipSpace
line = lex $ A.takeWhile (/='\n')
optional p = Just `fmap` p <|> return Nothing
p_object = p_blob
<|> p_reset
<|> p_commit
<|> p_tag
<|> p_modify
<|> p_rename
<|> p_copy
<|> p_from
<|> p_merge
<|> p_delete
<|> (lexString "progress" >> Progress `fmap` line)
p_author name = lexString name >> line
p_reset = do lexString "reset"
branch <- line
refid <- optional $ lexString "from" >> p_refid
return $ Reset branch refid
p_commit = do lexString "commit"
branch <- line
mark <- optional p_mark
_ <- optional $ p_author "author"
committer <- p_author "committer"
message <- p_data
return $ Commit branch mark committer message
p_tag = do _ <- lexString "tag"
tag <- line
lexString "from"
mark <- p_marked
author <- p_author "tagger"
message <- p_data
return $ Tag tag mark author message
p_blob = do lexString "blob"
mark <- optional p_mark
Blob mark `fmap` p_data
<?> "p_blob"
p_mark = do lexString "mark"
p_marked
<?> "p_mark"
p_refid = MarkId `fmap` p_marked
<|> (lexString "inline" >> return Inline)
<|> HashId `fmap` p_hash
p_data = do lexString "data"
len <- A.decimal
_ <- A.char '\n'
lex $ A.take len
<?> "p_data"
p_marked = lex $ A.char ':' >> A.decimal
p_hash = lex $ A.takeWhile1 (A.inClass "0123456789abcdefABCDEF")
p_from = lexString "from" >> From `fmap` p_marked
p_merge = lexString "merge" >> Merge `fmap` p_marked
p_delete = lexString "D" >> Delete `fmap` p_maybeQuotedName
p_rename = do lexString "R"
names <- p_maybeQuotedCopyRenameNames
return $ Rename names
p_copy = do lexString "C"
names <- p_maybeQuotedCopyRenameNames
return $ Copy names
p_modify = do lexString "M"
mode <- lex $ A.takeWhile (A.inClass "01234567890")
mark <- p_refid
path <- p_maybeQuotedName
case mark of
HashId hash | mode == BC.pack "160000" -> return $ Gitlink hash
| otherwise -> fail ":(("
MarkId n -> return $ Modify (Left n) path
Inline -> do bits <- p_data
return $ Modify (Right bits) path
p_maybeQuotedCopyRenameNames =
p_lexTwoQuotedNames <|> Unquoted `fmap` line
p_lexTwoQuotedNames = do
n1 <- lex p_quotedName
n2 <- lex p_quotedName
return $ Quoted n1 n2
p_maybeQuotedName = lex (p_quotedName <|> line)
p_quotedName = do
_ <- A.char '"'
bytes <- many (p_escaped <|> p_unescaped)
_ <- A.char '"'
return $ B.concat bytes
p_unescaped = A.takeWhile1 (\c->c/='"' && c/='\\')
p_escaped = do
_ <- A.char '\\'
p_escaped_octal <|> p_escaped_char
p_escaped_octal = do
let octals :: [Char]
octals = "01234567"
s <- A.takeWhile1 (`elem` octals)
let x :: Word8
x = read ("0o" ++ BC.unpack s)
return $ B.singleton $ fromIntegral x
p_escaped_char =
fmap BC.singleton $
'\r' <$ A.char 'r' <|> '\n' <$ A.char 'n' <|> A.char '"' <|> A.char '\\'
next' :: (B.ByteString -> A.Result (Maybe Object)) -> B.ByteString -> TreeIO (B.ByteString, Object)
next' parser rest =
do chunk <- if B.null rest then liftIO $ B.hGet stdin (64 * 1024)
else return rest
next_chunk parser chunk
next_chunk :: (B.ByteString -> A.Result (Maybe Object)) -> B.ByteString -> TreeIO (B.ByteString, Object)
next_chunk parser chunk =
case parser chunk of
A.Done rest result -> return (rest, maybe End id result)
A.Partial cont -> next' cont B.empty
A.Fail _ ctx err -> do
liftIO $ putStrLn $ "=== chunk ===\n" ++ decodeLocale chunk ++ "\n=== end chunk ===="
fail $ "Error parsing stream. " ++ err ++ "\nContext: " ++ show ctx
decodePath :: BC.ByteString -> AnchoredPath
decodePath = floatPath . decodeLocale