sync: Order projects according to last fetch time

Some projects may consistently take longer to fetch than others, for
example a more active project may have many more Gerrit changes than a
less active project, which take longer to transfer. Use a simple
heuristic based on the last fetch time to fetch slower projects first,
so we do not tend to spend the end of the sync fetching a small number
of outliers.

This algorithm is probably not optimal, and due to inter-run latency
variance and Python thread scheduling, we may not even have good
estimates of a project sync time.

Change-Id: I9a463f214b3ed742e4d807c42925b62cb8b1745b
This commit is contained in:
Dave Borowitz 2012-10-23 15:00:54 -07:00
parent 5c6eeac8f0
commit 67700e9b90

View File

@ -16,6 +16,7 @@
import netrc
from optparse import SUPPRESS_HELP
import os
import pickle
import re
import shutil
import socket
@ -47,6 +48,8 @@ from error import RepoChangedException, GitError
from project import SyncBuffer
from progress import Progress
_ONE_DAY_S = 24 * 60 * 60
class _FetchError(Exception):
"""Internal error thrown in _FetchHelper() when we don't want stack trace."""
pass
@ -212,10 +215,12 @@ later is required to fix a server side protocol bug.
# - We always make sure we unlock the lock if we locked it.
try:
try:
start = time.time()
success = project.Sync_NetworkHalf(
quiet=opt.quiet,
current_branch_only=opt.current_branch_only,
clone_bundle=not opt.no_clone_bundle)
self._fetch_times.Set(project, time.time() - start)
# Lock around all the rest of the code, since printing, updating a set
# and Progress.update() are not thread safe.
@ -293,6 +298,7 @@ later is required to fix a server side protocol bug.
sys.exit(1)
pm.end()
self._fetch_times.Save()
for project in projects:
project.bare_git.gc('--auto')
return fetched
@ -496,12 +502,15 @@ uncommitted changes are present' % project.relpath
self.jobs = self.manifest.default.sync_j
all_projects = self.GetProjects(args, missing_ok=True)
self._fetch_times = _FetchTimes(self.manifest)
if not opt.local_only:
to_fetch = []
now = time.time()
if (24 * 60 * 60) <= (now - rp.LastFetch):
if _ONE_DAY_S <= (now - rp.LastFetch):
to_fetch.append(rp)
to_fetch.extend(all_projects)
to_fetch.sort(key=self._fetch_times.Get, reverse=True)
self._fetch_times.Clear()
self._Fetch(to_fetch, opt)
_PostRepoFetch(rp, opt.no_repo_verify)
@ -602,3 +611,53 @@ warning: Cannot automatically authenticate repo."""
print >>sys.stderr
return False
return True
class _FetchTimes(object):
def __init__(self, manifest):
self._path = os.path.join(manifest.repodir, '.repopickle_fetchtimes')
self._times = None
def Clear(self):
self._times = {}
def Get(self, project):
self._Load()
return self._times.get(project.name, _ONE_DAY_S)
def Set(self, project, t):
self._times[project.name] = t
def _Load(self):
if self._times is None:
try:
f = open(self._path)
except IOError:
self._times = {}
return self._times
try:
try:
self._times = pickle.load(f)
except:
try:
os.remove(self._path)
except OSError:
pass
self._times = {}
finally:
f.close()
return self._times
def Save(self):
if self._times is None:
return
try:
f = open(self._path, 'wb')
try:
pickle.dump(self._times, f)
except (IOError, OSError, pickle.PickleError):
try:
os.remove(self._path)
except OSError:
pass
finally:
f.close()