From 188572170e8cdf28df55a5ca90ed70d14b72b804 Mon Sep 17 00:00:00 2001 From: Dave Borowitz Date: Tue, 23 Oct 2012 17:02:59 -0700 Subject: [PATCH] sync: Run gc --auto in parallel We can't just let this run wild with a high (or even low) -j, since that would hose a system. Instead, limit the total number of threads across all git gc subprocesses to the number of CPUs reported by the multiprocessing module (available in Python 2.6 and above). Change-Id: Icca0161a1e6116ffa5f7cfc6f5faecda510a7fb9 --- subcmds/sync.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/subcmds/sync.py b/subcmds/sync.py index b83f2d4a..9e4a9754 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -39,6 +39,11 @@ except ImportError: def _rlimit_nofile(): return (256, 256) +try: + import multiprocessing +except ImportError: + multiprocessing = None + from git_command import GIT from git_refs import R_HEADS, HEAD from project import Project @@ -299,10 +304,56 @@ later is required to fix a server side protocol bug. pm.end() self._fetch_times.Save() - for project in projects: - project.bare_git.gc('--auto') + + self._GCProjects(projects) return fetched + def _GCProjects(self, projects): + if multiprocessing: + cpu_count = multiprocessing.cpu_count() + else: + cpu_count = 1 + jobs = min(self.jobs, cpu_count) + + if jobs < 2: + for project in projects: + project.bare_git.gc('--auto') + return + + config = {'pack.threads': cpu_count / jobs if cpu_count > jobs else 1} + + threads = set() + sem = _threading.Semaphore(jobs) + err_event = _threading.Event() + + def GC(project): + try: + try: + project.bare_git.gc('--auto', config=config) + except GitError: + err_event.set() + except: + err_event.set() + raise + finally: + sem.release() + + for project in projects: + if err_event.isSet(): + break + sem.acquire() + t = _threading.Thread(target=GC, args=(project,)) + t.daemon = True + threads.add(t) + t.start() + + for t in threads: + t.join() + + if err_event.isSet(): + print >>sys.stderr, '\nerror: Exited sync due to gc errors' + sys.exit(1) + def UpdateProjectList(self): new_project_paths = [] for project in self.GetProjects(None, missing_ok=True):