sync: Run gc --auto in parallel

We can't just let this run wild with a high (or even low) -j, since
that would hose a system. Instead, limit the total number of threads
across all git gc subprocesses to the number of CPUs reported by the
multiprocessing module (available in Python 2.6 and above).

Change-Id: Icca0161a1e6116ffa5f7cfc6f5faecda510a7fb9
This commit is contained in:
Dave Borowitz 2012-10-23 17:02:59 -07:00
parent 091f893625
commit 188572170e

View File

@ -39,6 +39,11 @@ except ImportError:
def _rlimit_nofile():
return (256, 256)
try:
import multiprocessing
except ImportError:
multiprocessing = None
from git_command import GIT
from git_refs import R_HEADS, HEAD
from project import Project
@ -299,9 +304,55 @@ later is required to fix a server side protocol bug.
pm.end()
self._fetch_times.Save()
self._GCProjects(projects)
return fetched
def _GCProjects(self, projects):
if multiprocessing:
cpu_count = multiprocessing.cpu_count()
else:
cpu_count = 1
jobs = min(self.jobs, cpu_count)
if jobs < 2:
for project in projects:
project.bare_git.gc('--auto')
return fetched
return
config = {'pack.threads': cpu_count / jobs if cpu_count > jobs else 1}
threads = set()
sem = _threading.Semaphore(jobs)
err_event = _threading.Event()
def GC(project):
try:
try:
project.bare_git.gc('--auto', config=config)
except GitError:
err_event.set()
except:
err_event.set()
raise
finally:
sem.release()
for project in projects:
if err_event.isSet():
break
sem.acquire()
t = _threading.Thread(target=GC, args=(project,))
t.daemon = True
threads.add(t)
t.start()
for t in threads:
t.join()
if err_event.isSet():
print >>sys.stderr, '\nerror: Exited sync due to gc errors'
sys.exit(1)
def UpdateProjectList(self):
new_project_paths = []