mirror of
https://gerrit.googlesource.com/git-repo
synced 2024-12-21 07:16:21 +00:00
Add support for partial clone.
A new option, --partial-clone is added to 'repo init' which tells repo to utilize git's partial clone functionality, which reduces disk and bandwidth usage when downloading by omitting blob downloads initially. Different from restricting clone-depth, the user will have full access to change history, etc., as the objects are downloaded on demand. Change-Id: I60326744875eac16521a007bd7d5481112a98749 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/229532 Reviewed-by: Mike Frysinger <vapier@google.com> Tested-by: Xin Li <delphij@google.com>
This commit is contained in:
parent
87fb5a1894
commit
745be2ede1
@ -98,13 +98,15 @@ class _GitCall(object):
|
|||||||
return fun
|
return fun
|
||||||
git = _GitCall()
|
git = _GitCall()
|
||||||
|
|
||||||
def git_require(min_version, fail=False):
|
def git_require(min_version, fail=False, msg=''):
|
||||||
git_version = git.version_tuple()
|
git_version = git.version_tuple()
|
||||||
if min_version <= git_version:
|
if min_version <= git_version:
|
||||||
return True
|
return True
|
||||||
if fail:
|
if fail:
|
||||||
need = '.'.join(map(str, min_version))
|
need = '.'.join(map(str, min_version))
|
||||||
print('fatal: git %s or later required' % need, file=sys.stderr)
|
if msg:
|
||||||
|
msg = ' for ' + msg
|
||||||
|
print('fatal: git %s or later required%s' % (need, msg), file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -413,6 +413,12 @@ class XmlManifest(object):
|
|||||||
self._Load()
|
self._Load()
|
||||||
return self._manifest_server
|
return self._manifest_server
|
||||||
|
|
||||||
|
@property
|
||||||
|
def CloneFilter(self):
|
||||||
|
if self.manifestProject.config.GetBoolean('repo.partialclone'):
|
||||||
|
return self.manifestProject.config.GetString('repo.clonefilter')
|
||||||
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def IsMirror(self):
|
def IsMirror(self):
|
||||||
return self.manifestProject.config.GetBoolean('repo.mirror')
|
return self.manifestProject.config.GetBoolean('repo.mirror')
|
||||||
|
18
project.py
18
project.py
@ -1226,7 +1226,8 @@ class Project(object):
|
|||||||
archive=False,
|
archive=False,
|
||||||
optimized_fetch=False,
|
optimized_fetch=False,
|
||||||
prune=False,
|
prune=False,
|
||||||
submodules=False):
|
submodules=False,
|
||||||
|
clone_filter=None):
|
||||||
"""Perform only the network IO portion of the sync process.
|
"""Perform only the network IO portion of the sync process.
|
||||||
Local working directory/branch state is not affected.
|
Local working directory/branch state is not affected.
|
||||||
"""
|
"""
|
||||||
@ -1309,7 +1310,8 @@ class Project(object):
|
|||||||
not self._RemoteFetch(initial=is_new, quiet=quiet, alt_dir=alt_dir,
|
not self._RemoteFetch(initial=is_new, quiet=quiet, alt_dir=alt_dir,
|
||||||
current_branch_only=current_branch_only,
|
current_branch_only=current_branch_only,
|
||||||
no_tags=no_tags, prune=prune, depth=depth,
|
no_tags=no_tags, prune=prune, depth=depth,
|
||||||
submodules=submodules, force_sync=force_sync)):
|
submodules=submodules, force_sync=force_sync,
|
||||||
|
clone_filter=clone_filter)):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
mp = self.manifest.manifestProject
|
mp = self.manifest.manifestProject
|
||||||
@ -1959,7 +1961,8 @@ class Project(object):
|
|||||||
prune=False,
|
prune=False,
|
||||||
depth=None,
|
depth=None,
|
||||||
submodules=False,
|
submodules=False,
|
||||||
force_sync=False):
|
force_sync=False,
|
||||||
|
clone_filter=None):
|
||||||
|
|
||||||
is_sha1 = False
|
is_sha1 = False
|
||||||
tag_name = None
|
tag_name = None
|
||||||
@ -2050,6 +2053,11 @@ class Project(object):
|
|||||||
|
|
||||||
cmd = ['fetch']
|
cmd = ['fetch']
|
||||||
|
|
||||||
|
if clone_filter:
|
||||||
|
git_require((2, 19, 0), fail=True, msg='partial clones')
|
||||||
|
cmd.append('--filter=%s' % clone_filter)
|
||||||
|
self.config.SetString('extensions.partialclone', self.remote.name)
|
||||||
|
|
||||||
if depth:
|
if depth:
|
||||||
cmd.append('--depth=%s' % depth)
|
cmd.append('--depth=%s' % depth)
|
||||||
else:
|
else:
|
||||||
@ -2150,12 +2158,12 @@ class Project(object):
|
|||||||
return self._RemoteFetch(name=name,
|
return self._RemoteFetch(name=name,
|
||||||
current_branch_only=current_branch_only,
|
current_branch_only=current_branch_only,
|
||||||
initial=False, quiet=quiet, alt_dir=alt_dir,
|
initial=False, quiet=quiet, alt_dir=alt_dir,
|
||||||
depth=None)
|
depth=None, clone_filter=clone_filter)
|
||||||
else:
|
else:
|
||||||
# Avoid infinite recursion: sync all branches with depth set to None
|
# Avoid infinite recursion: sync all branches with depth set to None
|
||||||
return self._RemoteFetch(name=name, current_branch_only=False,
|
return self._RemoteFetch(name=name, current_branch_only=False,
|
||||||
initial=False, quiet=quiet, alt_dir=alt_dir,
|
initial=False, quiet=quiet, alt_dir=alt_dir,
|
||||||
depth=None)
|
depth=None, clone_filter=clone_filter)
|
||||||
|
|
||||||
return ok
|
return ok
|
||||||
|
|
||||||
|
7
repo
7
repo
@ -199,6 +199,13 @@ group.add_option('--dissociate',
|
|||||||
group.add_option('--depth', type='int', default=None,
|
group.add_option('--depth', type='int', default=None,
|
||||||
dest='depth',
|
dest='depth',
|
||||||
help='create a shallow clone with given depth; see git clone')
|
help='create a shallow clone with given depth; see git clone')
|
||||||
|
group.add_option('--partial-clone', action='store_true',
|
||||||
|
dest='partial_clone',
|
||||||
|
help='perform partial clone (https://git-scm.com/'
|
||||||
|
'docs/gitrepository-layout#_code_partialclone_code)')
|
||||||
|
group.add_option('--clone-filter', action='store', default='blob:none',
|
||||||
|
dest='clone_filter',
|
||||||
|
help='filter for use with --partial-clone [default: %default]')
|
||||||
group.add_option('--archive',
|
group.add_option('--archive',
|
||||||
dest='archive', action='store_true',
|
dest='archive', action='store_true',
|
||||||
help='checkout an archive instead of a git repository for '
|
help='checkout an archive instead of a git repository for '
|
||||||
|
@ -115,6 +115,13 @@ to update the working directory files.
|
|||||||
g.add_option('--depth', type='int', default=None,
|
g.add_option('--depth', type='int', default=None,
|
||||||
dest='depth',
|
dest='depth',
|
||||||
help='create a shallow clone with given depth; see git clone')
|
help='create a shallow clone with given depth; see git clone')
|
||||||
|
g.add_option('--partial-clone', action='store_true',
|
||||||
|
dest='partial_clone',
|
||||||
|
help='perform partial clone (https://git-scm.com/'
|
||||||
|
'docs/gitrepository-layout#_code_partialclone_code)')
|
||||||
|
g.add_option('--clone-filter', action='store', default='blob:none',
|
||||||
|
dest='clone_filter',
|
||||||
|
help='filter for use with --partial-clone [default: %default]')
|
||||||
g.add_option('--archive',
|
g.add_option('--archive',
|
||||||
dest='archive', action='store_true',
|
dest='archive', action='store_true',
|
||||||
help='checkout an archive instead of a git repository for '
|
help='checkout an archive instead of a git repository for '
|
||||||
@ -253,13 +260,25 @@ to update the working directory files.
|
|||||||
'in another location.', file=sys.stderr)
|
'in another location.', file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
if opt.partial_clone:
|
||||||
|
if opt.mirror:
|
||||||
|
print('fatal: --mirror and --partial-clone are mutually exclusive',
|
||||||
|
file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
m.config.SetString('repo.partialclone', 'true')
|
||||||
|
if opt.clone_filter:
|
||||||
|
m.config.SetString('repo.clonefilter', opt.clone_filter)
|
||||||
|
else:
|
||||||
|
opt.clone_filter = None
|
||||||
|
|
||||||
if opt.submodules:
|
if opt.submodules:
|
||||||
m.config.SetString('repo.submodules', 'true')
|
m.config.SetString('repo.submodules', 'true')
|
||||||
|
|
||||||
if not m.Sync_NetworkHalf(is_new=is_new, quiet=opt.quiet,
|
if not m.Sync_NetworkHalf(is_new=is_new, quiet=opt.quiet,
|
||||||
clone_bundle=not opt.no_clone_bundle,
|
clone_bundle=not opt.no_clone_bundle,
|
||||||
current_branch_only=opt.current_branch_only,
|
current_branch_only=opt.current_branch_only,
|
||||||
no_tags=opt.no_tags, submodules=opt.submodules):
|
no_tags=opt.no_tags, submodules=opt.submodules,
|
||||||
|
clone_filter=opt.clone_filter):
|
||||||
r = m.GetRemote(m.remote.name)
|
r = m.GetRemote(m.remote.name)
|
||||||
print('fatal: cannot obtain manifest %s' % r.url, file=sys.stderr)
|
print('fatal: cannot obtain manifest %s' % r.url, file=sys.stderr)
|
||||||
|
|
||||||
|
176
subcmds/sync.py
176
subcmds/sync.py
@ -85,6 +85,9 @@ class _FetchError(Exception):
|
|||||||
"""Internal error thrown in _FetchHelper() when we don't want stack trace."""
|
"""Internal error thrown in _FetchHelper() when we don't want stack trace."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class _CheckoutError(Exception):
|
||||||
|
"""Internal error thrown in _CheckoutOne() when we don't want stack trace."""
|
||||||
|
|
||||||
class Sync(Command, MirrorSafeCommand):
|
class Sync(Command, MirrorSafeCommand):
|
||||||
jobs = 1
|
jobs = 1
|
||||||
common = True
|
common = True
|
||||||
@ -266,7 +269,7 @@ later is required to fix a server side protocol bug.
|
|||||||
help=SUPPRESS_HELP)
|
help=SUPPRESS_HELP)
|
||||||
|
|
||||||
def _FetchProjectList(self, opt, projects, sem, *args, **kwargs):
|
def _FetchProjectList(self, opt, projects, sem, *args, **kwargs):
|
||||||
"""Main function of the fetch threads when jobs are > 1.
|
"""Main function of the fetch threads.
|
||||||
|
|
||||||
Delegates most of the work to _FetchHelper.
|
Delegates most of the work to _FetchHelper.
|
||||||
|
|
||||||
@ -286,7 +289,8 @@ later is required to fix a server side protocol bug.
|
|||||||
finally:
|
finally:
|
||||||
sem.release()
|
sem.release()
|
||||||
|
|
||||||
def _FetchHelper(self, opt, project, lock, fetched, pm, err_event):
|
def _FetchHelper(self, opt, project, lock, fetched, pm, err_event,
|
||||||
|
clone_filter):
|
||||||
"""Fetch git objects for a single project.
|
"""Fetch git objects for a single project.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -300,6 +304,7 @@ later is required to fix a server side protocol bug.
|
|||||||
lock held).
|
lock held).
|
||||||
err_event: We'll set this event in the case of an error (after printing
|
err_event: We'll set this event in the case of an error (after printing
|
||||||
out info about the error).
|
out info about the error).
|
||||||
|
clone_filter: Filter for use in a partial clone.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Whether the fetch was successful.
|
Whether the fetch was successful.
|
||||||
@ -312,7 +317,6 @@ later is required to fix a server side protocol bug.
|
|||||||
|
|
||||||
# Encapsulate everything in a try/except/finally so that:
|
# Encapsulate everything in a try/except/finally so that:
|
||||||
# - We always set err_event in the case of an exception.
|
# - We always set err_event in the case of an exception.
|
||||||
# - We always make sure we call sem.release().
|
|
||||||
# - We always make sure we unlock the lock if we locked it.
|
# - We always make sure we unlock the lock if we locked it.
|
||||||
start = time.time()
|
start = time.time()
|
||||||
success = False
|
success = False
|
||||||
@ -325,7 +329,8 @@ later is required to fix a server side protocol bug.
|
|||||||
clone_bundle=not opt.no_clone_bundle,
|
clone_bundle=not opt.no_clone_bundle,
|
||||||
no_tags=opt.no_tags, archive=self.manifest.IsArchive,
|
no_tags=opt.no_tags, archive=self.manifest.IsArchive,
|
||||||
optimized_fetch=opt.optimized_fetch,
|
optimized_fetch=opt.optimized_fetch,
|
||||||
prune=opt.prune)
|
prune=opt.prune,
|
||||||
|
clone_filter=clone_filter)
|
||||||
self._fetch_times.Set(project, time.time() - start)
|
self._fetch_times.Set(project, time.time() - start)
|
||||||
|
|
||||||
# Lock around all the rest of the code, since printing, updating a set
|
# Lock around all the rest of the code, since printing, updating a set
|
||||||
@ -389,7 +394,8 @@ later is required to fix a server side protocol bug.
|
|||||||
lock=lock,
|
lock=lock,
|
||||||
fetched=fetched,
|
fetched=fetched,
|
||||||
pm=pm,
|
pm=pm,
|
||||||
err_event=err_event)
|
err_event=err_event,
|
||||||
|
clone_filter=self.manifest.CloneFilter)
|
||||||
if self.jobs > 1:
|
if self.jobs > 1:
|
||||||
t = _threading.Thread(target = self._FetchProjectList,
|
t = _threading.Thread(target = self._FetchProjectList,
|
||||||
kwargs = kwargs)
|
kwargs = kwargs)
|
||||||
@ -416,6 +422,148 @@ later is required to fix a server side protocol bug.
|
|||||||
|
|
||||||
return fetched
|
return fetched
|
||||||
|
|
||||||
|
def _CheckoutWorker(self, opt, sem, project, *args, **kwargs):
|
||||||
|
"""Main function of the fetch threads.
|
||||||
|
|
||||||
|
Delegates most of the work to _CheckoutOne.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
opt: Program options returned from optparse. See _Options().
|
||||||
|
projects: Projects to fetch.
|
||||||
|
sem: We'll release() this semaphore when we exit so that another thread
|
||||||
|
can be started up.
|
||||||
|
*args, **kwargs: Remaining arguments to pass to _CheckoutOne. See the
|
||||||
|
_CheckoutOne docstring for details.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
success = self._CheckoutOne(opt, project, *args, **kwargs)
|
||||||
|
if not success:
|
||||||
|
sys.exit(1)
|
||||||
|
finally:
|
||||||
|
sem.release()
|
||||||
|
|
||||||
|
def _CheckoutOne(self, opt, project, lock, pm, err_event):
|
||||||
|
"""Checkout work tree for one project
|
||||||
|
|
||||||
|
Args:
|
||||||
|
opt: Program options returned from optparse. See _Options().
|
||||||
|
project: Project object for the project to checkout.
|
||||||
|
lock: Lock for accessing objects that are shared amongst multiple
|
||||||
|
_CheckoutWorker() threads.
|
||||||
|
pm: Instance of a Project object. We will call pm.update() (with our
|
||||||
|
lock held).
|
||||||
|
err_event: We'll set this event in the case of an error (after printing
|
||||||
|
out info about the error).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Whether the fetch was successful.
|
||||||
|
"""
|
||||||
|
# We'll set to true once we've locked the lock.
|
||||||
|
did_lock = False
|
||||||
|
|
||||||
|
if not opt.quiet:
|
||||||
|
print('Checking out project %s' % project.name)
|
||||||
|
|
||||||
|
# Encapsulate everything in a try/except/finally so that:
|
||||||
|
# - We always set err_event in the case of an exception.
|
||||||
|
# - We always make sure we unlock the lock if we locked it.
|
||||||
|
start = time.time()
|
||||||
|
syncbuf = SyncBuffer(self.manifest.manifestProject.config,
|
||||||
|
detach_head=opt.detach_head)
|
||||||
|
success = False
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
project.Sync_LocalHalf(syncbuf, force_sync=opt.force_sync)
|
||||||
|
success = syncbuf.Finish()
|
||||||
|
|
||||||
|
# Lock around all the rest of the code, since printing, updating a set
|
||||||
|
# and Progress.update() are not thread safe.
|
||||||
|
lock.acquire()
|
||||||
|
did_lock = True
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
err_event.set()
|
||||||
|
print('error: Cannot checkout %s' % (project.name),
|
||||||
|
file=sys.stderr)
|
||||||
|
raise _CheckoutError()
|
||||||
|
|
||||||
|
pm.update()
|
||||||
|
except _CheckoutError:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
print('error: Cannot checkout %s: %s: %s' %
|
||||||
|
(project.name, type(e).__name__, str(e)),
|
||||||
|
file=sys.stderr)
|
||||||
|
err_event.set()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
if did_lock:
|
||||||
|
lock.release()
|
||||||
|
finish = time.time()
|
||||||
|
self.event_log.AddSync(project, event_log.TASK_SYNC_LOCAL,
|
||||||
|
start, finish, success)
|
||||||
|
|
||||||
|
return success
|
||||||
|
|
||||||
|
def _Checkout(self, all_projects, opt):
|
||||||
|
"""Checkout projects listed in all_projects
|
||||||
|
|
||||||
|
Args:
|
||||||
|
all_projects: List of all projects that should be checked out.
|
||||||
|
opt: Program options returned from optparse. See _Options().
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Perform checkouts in multiple threads when we are using partial clone.
|
||||||
|
# Without partial clone, all needed git objects are already downloaded,
|
||||||
|
# in this situation it's better to use only one process because the checkout
|
||||||
|
# would be mostly disk I/O; with partial clone, the objects are only
|
||||||
|
# downloaded when demanded (at checkout time), which is similar to the
|
||||||
|
# Sync_NetworkHalf case and parallelism would be helpful.
|
||||||
|
if self.manifest.CloneFilter:
|
||||||
|
syncjobs = self.jobs
|
||||||
|
else:
|
||||||
|
syncjobs = 1
|
||||||
|
|
||||||
|
lock = _threading.Lock()
|
||||||
|
pm = Progress('Syncing work tree', len(all_projects))
|
||||||
|
|
||||||
|
threads = set()
|
||||||
|
sem = _threading.Semaphore(syncjobs)
|
||||||
|
err_event = _threading.Event()
|
||||||
|
|
||||||
|
for project in all_projects:
|
||||||
|
# Check for any errors before running any more tasks.
|
||||||
|
# ...we'll let existing threads finish, though.
|
||||||
|
if err_event.isSet() and not opt.force_broken:
|
||||||
|
break
|
||||||
|
|
||||||
|
sem.acquire()
|
||||||
|
if project.worktree:
|
||||||
|
kwargs = dict(opt=opt,
|
||||||
|
sem=sem,
|
||||||
|
project=project,
|
||||||
|
lock=lock,
|
||||||
|
pm=pm,
|
||||||
|
err_event=err_event)
|
||||||
|
if syncjobs > 1:
|
||||||
|
t = _threading.Thread(target=self._CheckoutWorker,
|
||||||
|
kwargs=kwargs)
|
||||||
|
# Ensure that Ctrl-C will not freeze the repo process.
|
||||||
|
t.daemon = True
|
||||||
|
threads.add(t)
|
||||||
|
t.start()
|
||||||
|
else:
|
||||||
|
self._CheckoutWorker(**kwargs)
|
||||||
|
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
pm.end()
|
||||||
|
# If we saw an error, exit with code 1 so that other scripts can check.
|
||||||
|
if err_event.isSet():
|
||||||
|
print('\nerror: Exited sync due to checkout errors', file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
def _GCProjects(self, projects):
|
def _GCProjects(self, projects):
|
||||||
gc_gitdirs = {}
|
gc_gitdirs = {}
|
||||||
for project in projects:
|
for project in projects:
|
||||||
@ -746,7 +894,8 @@ later is required to fix a server side protocol bug.
|
|||||||
current_branch_only=opt.current_branch_only,
|
current_branch_only=opt.current_branch_only,
|
||||||
no_tags=opt.no_tags,
|
no_tags=opt.no_tags,
|
||||||
optimized_fetch=opt.optimized_fetch,
|
optimized_fetch=opt.optimized_fetch,
|
||||||
submodules=self.manifest.HasSubmodules)
|
submodules=self.manifest.HasSubmodules,
|
||||||
|
clone_filter=self.manifest.CloneFilter)
|
||||||
finish = time.time()
|
finish = time.time()
|
||||||
self.event_log.AddSync(mp, event_log.TASK_SYNC_NETWORK,
|
self.event_log.AddSync(mp, event_log.TASK_SYNC_NETWORK,
|
||||||
start, finish, success)
|
start, finish, success)
|
||||||
@ -846,20 +995,7 @@ later is required to fix a server side protocol bug.
|
|||||||
if self.UpdateProjectList(opt):
|
if self.UpdateProjectList(opt):
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
syncbuf = SyncBuffer(mp.config,
|
self._Checkout(all_projects, opt)
|
||||||
detach_head = opt.detach_head)
|
|
||||||
pm = Progress('Syncing work tree', len(all_projects))
|
|
||||||
for project in all_projects:
|
|
||||||
pm.update()
|
|
||||||
if project.worktree:
|
|
||||||
start = time.time()
|
|
||||||
project.Sync_LocalHalf(syncbuf, force_sync=opt.force_sync)
|
|
||||||
self.event_log.AddSync(project, event_log.TASK_SYNC_LOCAL,
|
|
||||||
start, time.time(), syncbuf.Recently())
|
|
||||||
pm.end()
|
|
||||||
print(file=sys.stderr)
|
|
||||||
if not syncbuf.Finish():
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# If there's a notice that's supposed to print at the end of the sync, print
|
# If there's a notice that's supposed to print at the end of the sync, print
|
||||||
# it now...
|
# it now...
|
||||||
|
Loading…
Reference in New Issue
Block a user