From 67700e9b90a38cc3719c818bc967153e8b7b429e Mon Sep 17 00:00:00 2001 From: Dave Borowitz Date: Tue, 23 Oct 2012 15:00:54 -0700 Subject: [PATCH] sync: Order projects according to last fetch time Some projects may consistently take longer to fetch than others, for example a more active project may have many more Gerrit changes than a less active project, which take longer to transfer. Use a simple heuristic based on the last fetch time to fetch slower projects first, so we do not tend to spend the end of the sync fetching a small number of outliers. This algorithm is probably not optimal, and due to inter-run latency variance and Python thread scheduling, we may not even have good estimates of a project sync time. Change-Id: I9a463f214b3ed742e4d807c42925b62cb8b1745b --- subcmds/sync.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/subcmds/sync.py b/subcmds/sync.py index e68a025e..a8022d9d 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -16,6 +16,7 @@ import netrc from optparse import SUPPRESS_HELP import os +import pickle import re import shutil import socket @@ -47,6 +48,8 @@ from error import RepoChangedException, GitError from project import SyncBuffer from progress import Progress +_ONE_DAY_S = 24 * 60 * 60 + class _FetchError(Exception): """Internal error thrown in _FetchHelper() when we don't want stack trace.""" pass @@ -212,10 +215,12 @@ later is required to fix a server side protocol bug. # - We always make sure we unlock the lock if we locked it. try: try: + start = time.time() success = project.Sync_NetworkHalf( quiet=opt.quiet, current_branch_only=opt.current_branch_only, clone_bundle=not opt.no_clone_bundle) + self._fetch_times.Set(project, time.time() - start) # Lock around all the rest of the code, since printing, updating a set # and Progress.update() are not thread safe. @@ -293,6 +298,7 @@ later is required to fix a server side protocol bug. sys.exit(1) pm.end() + self._fetch_times.Save() for project in projects: project.bare_git.gc('--auto') return fetched @@ -496,12 +502,15 @@ uncommitted changes are present' % project.relpath self.jobs = self.manifest.default.sync_j all_projects = self.GetProjects(args, missing_ok=True) + self._fetch_times = _FetchTimes(self.manifest) if not opt.local_only: to_fetch = [] now = time.time() - if (24 * 60 * 60) <= (now - rp.LastFetch): + if _ONE_DAY_S <= (now - rp.LastFetch): to_fetch.append(rp) to_fetch.extend(all_projects) + to_fetch.sort(key=self._fetch_times.Get, reverse=True) + self._fetch_times.Clear() self._Fetch(to_fetch, opt) _PostRepoFetch(rp, opt.no_repo_verify) @@ -602,3 +611,53 @@ warning: Cannot automatically authenticate repo.""" print >>sys.stderr return False return True + +class _FetchTimes(object): + def __init__(self, manifest): + self._path = os.path.join(manifest.repodir, '.repopickle_fetchtimes') + self._times = None + + def Clear(self): + self._times = {} + + def Get(self, project): + self._Load() + return self._times.get(project.name, _ONE_DAY_S) + + def Set(self, project, t): + self._times[project.name] = t + + def _Load(self): + if self._times is None: + try: + f = open(self._path) + except IOError: + self._times = {} + return self._times + try: + try: + self._times = pickle.load(f) + except: + try: + os.remove(self._path) + except OSError: + pass + self._times = {} + finally: + f.close() + return self._times + + def Save(self): + if self._times is None: + return + try: + f = open(self._path, 'wb') + try: + pickle.dump(self._times, f) + except (IOError, OSError, pickle.PickleError): + try: + os.remove(self._path) + except OSError: + pass + finally: + f.close()