sync: add retry to fetch operations

Add retries with exponential backoff and jitter to the fetch
operations. By default don't change behavior and enable
behind the new flag '--fetch-retries'.

Bug: https://crbug.com/1061473

Change-Id: I492710843985d00f81cbe3402dc56f2d21a45b35
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/261576
Reviewed-by: Mike Frysinger <vapier@google.com>
Tested-by: George Engelbrecht <engeg@google.com>
This commit is contained in:
George Engelbrecht 2020-04-02 12:36:09 -06:00 committed by Mike Frysinger
parent b4a6f6d798
commit 9bc283e49b
2 changed files with 46 additions and 9 deletions

View File

@ -55,6 +55,12 @@ else:
input = raw_input # noqa: F821
# Maximum sleep time allowed during retries.
MAXIMUM_RETRY_SLEEP_SEC = 3600.0
# +-10% random jitter is added to each Fetches retry sleep duration.
RETRY_JITTER_PERCENT = 0.1
def _lwrite(path, content):
lock = '%s.lock' % path
@ -875,6 +881,7 @@ class Project(object):
is_derived=False,
dest_branch=None,
optimized_fetch=False,
retry_fetches=0,
old_revision=None):
"""Init a Project object.
@ -901,6 +908,8 @@ class Project(object):
dest_branch: The branch to which to push changes for review by default.
optimized_fetch: If True, when a project is set to a sha1 revision, only
fetch from the remote if the sha1 is not present locally.
retry_fetches: Retry remote fetches n times upon receiving transient error
with exponential backoff and jitter.
old_revision: saved git commit id for open GITC projects.
"""
self.manifest = manifest
@ -936,6 +945,7 @@ class Project(object):
self.use_git_worktrees = use_git_worktrees
self.is_derived = is_derived
self.optimized_fetch = optimized_fetch
self.retry_fetches = max(0, retry_fetches)
self.subprojects = []
self.snapshots = {}
@ -1449,6 +1459,7 @@ class Project(object):
tags=True,
archive=False,
optimized_fetch=False,
retry_fetches=0,
prune=False,
submodules=False,
clone_filter=None):
@ -1532,7 +1543,7 @@ class Project(object):
current_branch_only=current_branch_only,
tags=tags, prune=prune, depth=depth,
submodules=submodules, force_sync=force_sync,
clone_filter=clone_filter):
clone_filter=clone_filter, retry_fetches=retry_fetches):
return False
mp = self.manifest.manifestProject
@ -2334,8 +2345,10 @@ class Project(object):
depth=None,
submodules=False,
force_sync=False,
clone_filter=None):
clone_filter=None,
retry_fetches=2,
retry_sleep_initial_sec=4.0,
retry_exp_factor=2.0):
is_sha1 = False
tag_name = None
# The depth should not be used when fetching to a mirror because
@ -2497,18 +2510,37 @@ class Project(object):
cmd.extend(spec)
ok = False
for _i in range(2):
# At least one retry minimum due to git remote prune.
retry_fetches = max(retry_fetches, 2)
retry_cur_sleep = retry_sleep_initial_sec
ok = prune_tried = False
for try_n in range(retry_fetches):
gitcmd = GitCommand(self, cmd, bare=True, ssh_proxy=ssh_proxy,
merge_output=True, capture_stdout=quiet)
ret = gitcmd.Wait()
if ret == 0:
ok = True
break
# If needed, run the 'git remote prune' the first time through the loop
elif (not _i and
"error:" in gitcmd.stderr and
"git remote prune" in gitcmd.stderr):
# Retry later due to HTTP 429 Too Many Requests.
elif ('error:' in gitcmd.stderr and
'HTTP 429' in gitcmd.stderr):
if not quiet:
print('429 received, sleeping: %s sec' % retry_cur_sleep,
file=sys.stderr)
time.sleep(retry_cur_sleep)
retry_cur_sleep = min(retry_exp_factor * retry_cur_sleep,
MAXIMUM_RETRY_SLEEP_SEC)
retry_cur_sleep *= (1 - random.uniform(-RETRY_JITTER_PERCENT,
RETRY_JITTER_PERCENT))
continue
# If this is not last attempt, try 'git remote prune'.
elif (try_n < retry_fetches - 1 and
'error:' in gitcmd.stderr and
'git remote prune' in gitcmd.stderr and
not prune_tried):
prune_tried = True
prunecmd = GitCommand(self, ['remote', 'prune', name], bare=True,
ssh_proxy=ssh_proxy)
ret = prunecmd.Wait()

View File

@ -265,6 +265,9 @@ later is required to fix a server side protocol bug.
p.add_option('--optimized-fetch',
dest='optimized_fetch', action='store_true',
help='only fetch projects fixed to sha1 if revision does not exist locally')
p.add_option('--retry-fetches',
default=0, action='store', type='int',
help='number of times to retry fetches on transient errors')
p.add_option('--prune', dest='prune', action='store_true',
help='delete refs that no longer exist on the remote')
if show_smart:
@ -342,6 +345,7 @@ later is required to fix a server side protocol bug.
clone_bundle=opt.clone_bundle,
tags=opt.tags, archive=self.manifest.IsArchive,
optimized_fetch=opt.optimized_fetch,
retry_fetches=opt.retry_fetches,
prune=opt.prune,
clone_filter=clone_filter)
self._fetch_times.Set(project, time.time() - start)
@ -777,6 +781,7 @@ later is required to fix a server side protocol bug.
current_branch_only=opt.current_branch_only,
tags=opt.tags,
optimized_fetch=opt.optimized_fetch,
retry_fetches=opt.retry_fetches,
submodules=self.manifest.HasSubmodules,
clone_filter=self.manifest.CloneFilter)
finish = time.time()