From d947858325ae70ff9c0b2f463a9e8c4ffd00002a Mon Sep 17 00:00:00 2001 From: Dave Borowitz Date: Tue, 23 Oct 2012 16:35:39 -0700 Subject: [PATCH] sync: Keep a moving average of last fetch times Try to more accurately estimate which projects take the longest to sync by keeping an exponentially weighted moving average (a=0.5) of fetch times, rather than just recording the last observation. This should discount individual outliers (e.g. an unusually large project update) and hopefully allow truly slow repos to bubble to the top. Change-Id: I72b2508cb1266e8a19cf15b616d8a7fc08098cb3 --- subcmds/sync.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/subcmds/sync.py b/subcmds/sync.py index a8022d9d..b83f2d4a 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -510,7 +510,6 @@ uncommitted changes are present' % project.relpath to_fetch.append(rp) to_fetch.extend(all_projects) to_fetch.sort(key=self._fetch_times.Get, reverse=True) - self._fetch_times.Clear() self._Fetch(to_fetch, opt) _PostRepoFetch(rp, opt.no_repo_verify) @@ -613,19 +612,24 @@ warning: Cannot automatically authenticate repo.""" return True class _FetchTimes(object): + _ALPHA = 0.5 + def __init__(self, manifest): self._path = os.path.join(manifest.repodir, '.repopickle_fetchtimes') self._times = None - - def Clear(self): - self._times = {} + self._seen = set() def Get(self, project): self._Load() return self._times.get(project.name, _ONE_DAY_S) def Set(self, project, t): - self._times[project.name] = t + self._Load() + name = project.name + old = self._times.get(name, t) + self._seen.add(name) + a = self._ALPHA + self._times[name] = (a*t) + ((1-a) * old) def _Load(self): if self._times is None: @@ -650,6 +654,14 @@ class _FetchTimes(object): def Save(self): if self._times is None: return + + to_delete = [] + for name in self._times: + if name not in self._seen: + to_delete.append(name) + for name in to_delete: + del self._times[name] + try: f = open(self._path, 'wb') try: