Add --archive option to init to sync using git archive

This significantly reduces sync time and used brandwidth as only
a tar of each project's revision is checked out, but git is not
accessible from projects anymore.

This is relevant when git is not needed in projects but sync
speed/brandwidth may be important like on CI servers when building
several versions from scratch regularly for example.

Archive is not supported over http/https.

Change-Id: I48c3c7de2cd5a1faec33e295fcdafbc7807d0e4d
Signed-off-by: Julien Campergue <julien.campergue@parrot.com>
This commit is contained in:
Julien Campergue 2013-10-16 11:02:35 +02:00 committed by David Pursehouse
parent 61df418c59
commit 335f5ef4ad
5 changed files with 98 additions and 5 deletions

View File

@ -329,6 +329,10 @@ class XmlManifest(object):
def IsMirror(self):
return self.manifestProject.config.GetBoolean('repo.mirror')
@property
def IsArchive(self):
return self.manifestProject.config.GetBoolean('repo.archive')
def _Unload(self):
self._loaded = False
self._projects = {}

View File

@ -23,6 +23,7 @@ import shutil
import stat
import subprocess
import sys
import tarfile
import tempfile
import time
@ -982,15 +983,62 @@ class Project(object):
## Sync ##
def _ExtractArchive(self, tarpath, path=None):
"""Extract the given tar on its current location
Args:
- tarpath: The path to the actual tar file
"""
try:
with tarfile.open(tarpath, 'r') as tar:
tar.extractall(path=path)
return True
except (IOError, tarfile.TarError) as e:
print("error: Cannot extract archive %s: "
"%s" % (tarpath, str(e)), file=sys.stderr)
return False
def Sync_NetworkHalf(self,
quiet=False,
is_new=None,
current_branch_only=False,
clone_bundle=True,
no_tags=False):
no_tags=False,
archive=False):
"""Perform only the network IO portion of the sync process.
Local working directory/branch state is not affected.
"""
if archive and not isinstance(self, MetaProject):
if self.remote.url.startswith(('http://', 'https://')):
print("error: %s: Cannot fetch archives from http/https "
"remotes." % self.name, file=sys.stderr)
return False
name = self.relpath.replace('\\', '/')
name = name.replace('/', '_')
tarpath = '%s.tar' % name
topdir = self.manifest.topdir
try:
self._FetchArchive(tarpath, cwd=topdir)
except GitError as e:
print('error: %s' % str(e), file=sys.stderr)
return False
# From now on, we only need absolute tarpath
tarpath = os.path.join(topdir, tarpath)
if not self._ExtractArchive(tarpath, path=topdir):
return False
try:
os.remove(tarpath)
except OSError as e:
print("warn: Cannot remove archive %s: "
"%s" % (tarpath, str(e)), file=sys.stderr)
self._CopyFiles()
return True
if is_new is None:
is_new = not self.Exists
if is_new:
@ -1573,6 +1621,19 @@ class Project(object):
## Direct Git Commands ##
def _FetchArchive(self, tarpath, cwd=None):
cmd = ['archive', '-v', '-o', tarpath]
cmd.append('--remote=%s' % self.remote.url)
cmd.append('--prefix=%s/' % self.relpath)
cmd.append(self.revisionExpr)
command = GitCommand(self, cmd, cwd=cwd,
capture_stdout=True,
capture_stderr=True)
if command.Wait() != 0:
raise GitError('git archive %s: %s' % (self.name, command.stderr))
def _RemoteFetch(self, name=None,
current_branch_only=False,
initial=False,

4
repo
View File

@ -180,6 +180,10 @@ group.add_option('--reference',
group.add_option('--depth', type='int', default=None,
dest='depth',
help='create a shallow clone with given depth; see git clone')
group.add_option('--archive',
dest='archive', action='store_true',
help='checkout an archive instead of a git repository for '
'each project. See git archive.')
group.add_option('-g', '--groups',
dest='groups', default='default',
help='restrict manifest projects to ones with specified '

View File

@ -99,6 +99,10 @@ to update the working directory files.
g.add_option('--depth', type='int', default=None,
dest='depth',
help='create a shallow clone with given depth; see git clone')
g.add_option('--archive',
dest='archive', action='store_true',
help='checkout an archive instead of a git repository for '
'each project. See git archive.')
g.add_option('-g', '--groups',
dest='groups', default='default',
help='restrict manifest projects to ones with specified '
@ -198,6 +202,16 @@ to update the working directory files.
if opt.reference:
m.config.SetString('repo.reference', opt.reference)
if opt.archive:
if is_new:
m.config.SetString('repo.archive', 'true')
else:
print('fatal: --archive is only supported when initializing a new '
'workspace.', file=sys.stderr)
print('Either delete the .repo folder in this workspace, or initialize '
'in another location.', file=sys.stderr)
sys.exit(1)
if opt.mirror:
if is_new:
m.config.SetString('repo.mirror', 'true')
@ -366,6 +380,13 @@ to update the working directory files.
if opt.reference:
opt.reference = os.path.expanduser(opt.reference)
# Check this here, else manifest will be tagged "not new" and init won't be
# possible anymore without removing the .repo/manifests directory.
if opt.archive and opt.mirror:
print('fatal: --mirror and --archive cannot be used together.',
file=sys.stderr)
sys.exit(1)
self._SyncManifest(opt)
self._LinkManifest(opt.manifest_name)

View File

@ -253,7 +253,7 @@ later is required to fix a server side protocol bug.
quiet=opt.quiet,
current_branch_only=opt.current_branch_only,
clone_bundle=not opt.no_clone_bundle,
no_tags=opt.no_tags)
no_tags=opt.no_tags, archive=self.manifest.IsArchive)
self._fetch_times.Set(project, time.time() - start)
# Lock around all the rest of the code, since printing, updating a set
@ -294,7 +294,8 @@ later is required to fix a server side protocol bug.
quiet=opt.quiet,
current_branch_only=opt.current_branch_only,
clone_bundle=not opt.no_clone_bundle,
no_tags=opt.no_tags):
no_tags=opt.no_tags,
archive=self.manifest.IsArchive):
fetched.add(project.gitdir)
else:
print('error: Cannot fetch %s' % project.name, file=sys.stderr)
@ -338,7 +339,9 @@ later is required to fix a server side protocol bug.
pm.end()
self._fetch_times.Save()
self._GCProjects(projects)
if not self.manifest.IsArchive:
self._GCProjects(projects)
return fetched
def _GCProjects(self, projects):
@ -641,7 +644,7 @@ later is required to fix a server side protocol bug.
previously_missing_set = missing_set
fetched.update(self._Fetch(missing, opt))
if self.manifest.IsMirror:
if self.manifest.IsMirror or self.manifest.IsArchive:
# bail out now, we have no working tree
return