subcmds/sync: Use pack-refs instead of gc for redundant gitdirs.

Previously `git gc` was being run on every gitdir even when they shared
the same objects. Instead only call it once and use pack-refs for the
gitdirs that were not gc'ed.

Bug: https://crbug.com/gerrit/15113
Test: repo sync -j # and check that git pack-refs is called
Change-Id: Icff37ab3ec78cfb44391d8cc7f2d875991532320
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/320275
Tested-by: Allen Webb <allenwebb@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
This commit is contained in:
Allen Webb 2021-10-07 10:42:38 -05:00
parent 0f6f16ed17
commit 4ee4a45d03

View File

@ -605,7 +605,7 @@ later is required to fix a server side protocol bug.
pm = Progress('Garbage collecting', len(projects), delay=False, quiet=opt.quiet) pm = Progress('Garbage collecting', len(projects), delay=False, quiet=opt.quiet)
pm.update(inc=0, msg='prescan') pm.update(inc=0, msg='prescan')
gc_gitdirs = {} tidy_dirs = {}
for project in projects: for project in projects:
# Make sure pruning never kicks in with shared projects. # Make sure pruning never kicks in with shared projects.
if (not project.use_git_worktrees and if (not project.use_git_worktrees and
@ -623,17 +623,28 @@ later is required to fix a server side protocol bug.
file=sys.stderr) file=sys.stderr)
project.config.SetString('gc.pruneExpire', 'never') project.config.SetString('gc.pruneExpire', 'never')
project.config.SetString('gc.autoDetach', 'false') project.config.SetString('gc.autoDetach', 'false')
gc_gitdirs[project.gitdir] = project.bare_git # Only call git gc once per objdir, but call pack-refs for the remainder.
if project.objdir not in tidy_dirs:
pm.update(inc=len(projects) - len(gc_gitdirs), msg='warming up') tidy_dirs[project.objdir] = (
True, # Run a full gc.
project.bare_git,
)
elif project.gitdir not in tidy_dirs:
tidy_dirs[project.gitdir] = (
False, # Do not run a full gc; just run pack-refs.
project.bare_git,
)
cpu_count = os.cpu_count() cpu_count = os.cpu_count()
jobs = min(self.jobs, cpu_count) jobs = min(self.jobs, cpu_count)
if jobs < 2: if jobs < 2:
for bare_git in gc_gitdirs.values(): for (run_gc, bare_git) in tidy_dirs.values():
pm.update(msg=bare_git._project.name) pm.update(msg=bare_git._project.name)
if run_gc:
bare_git.gc('--auto') bare_git.gc('--auto')
else:
bare_git.pack_refs()
pm.end() pm.end()
return return
@ -642,11 +653,14 @@ later is required to fix a server side protocol bug.
threads = set() threads = set()
sem = _threading.Semaphore(jobs) sem = _threading.Semaphore(jobs)
def GC(bare_git): def tidy_up(run_gc, bare_git):
pm.start(bare_git._project.name) pm.start(bare_git._project.name)
try: try:
try: try:
if run_gc:
bare_git.gc('--auto', config=config) bare_git.gc('--auto', config=config)
else:
bare_git.pack_refs(config=config)
except GitError: except GitError:
err_event.set() err_event.set()
except Exception: except Exception:
@ -656,11 +670,11 @@ later is required to fix a server side protocol bug.
pm.finish(bare_git._project.name) pm.finish(bare_git._project.name)
sem.release() sem.release()
for bare_git in gc_gitdirs.values(): for (run_gc, bare_git) in tidy_dirs.values():
if err_event.is_set() and opt.fail_fast: if err_event.is_set() and opt.fail_fast:
break break
sem.acquire() sem.acquire()
t = _threading.Thread(target=GC, args=(bare_git,)) t = _threading.Thread(target=tidy_up, args=(run_gc, bare_git,))
t.daemon = True t.daemon = True
threads.add(t) threads.add(t)
t.start() t.start()