sync: add retry to fetch operations

Add retries with exponential backoff and jitter to the fetch
operations. By default don't change behavior and enable
behind the new flag '--fetch-retries'.

Bug: https://crbug.com/1061473

Change-Id: I492710843985d00f81cbe3402dc56f2d21a45b35
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/261576
Reviewed-by: Mike Frysinger <vapier@google.com>
Tested-by: George Engelbrecht <engeg@google.com>
This commit is contained in:
George Engelbrecht 2020-04-02 12:36:09 -06:00 committed by Mike Frysinger
parent b4a6f6d798
commit 9bc283e49b
2 changed files with 46 additions and 9 deletions

View File

@ -55,6 +55,12 @@ else:
input = raw_input # noqa: F821 input = raw_input # noqa: F821
# Maximum sleep time allowed during retries.
MAXIMUM_RETRY_SLEEP_SEC = 3600.0
# +-10% random jitter is added to each Fetches retry sleep duration.
RETRY_JITTER_PERCENT = 0.1
def _lwrite(path, content): def _lwrite(path, content):
lock = '%s.lock' % path lock = '%s.lock' % path
@ -875,6 +881,7 @@ class Project(object):
is_derived=False, is_derived=False,
dest_branch=None, dest_branch=None,
optimized_fetch=False, optimized_fetch=False,
retry_fetches=0,
old_revision=None): old_revision=None):
"""Init a Project object. """Init a Project object.
@ -901,6 +908,8 @@ class Project(object):
dest_branch: The branch to which to push changes for review by default. dest_branch: The branch to which to push changes for review by default.
optimized_fetch: If True, when a project is set to a sha1 revision, only optimized_fetch: If True, when a project is set to a sha1 revision, only
fetch from the remote if the sha1 is not present locally. fetch from the remote if the sha1 is not present locally.
retry_fetches: Retry remote fetches n times upon receiving transient error
with exponential backoff and jitter.
old_revision: saved git commit id for open GITC projects. old_revision: saved git commit id for open GITC projects.
""" """
self.manifest = manifest self.manifest = manifest
@ -936,6 +945,7 @@ class Project(object):
self.use_git_worktrees = use_git_worktrees self.use_git_worktrees = use_git_worktrees
self.is_derived = is_derived self.is_derived = is_derived
self.optimized_fetch = optimized_fetch self.optimized_fetch = optimized_fetch
self.retry_fetches = max(0, retry_fetches)
self.subprojects = [] self.subprojects = []
self.snapshots = {} self.snapshots = {}
@ -1449,6 +1459,7 @@ class Project(object):
tags=True, tags=True,
archive=False, archive=False,
optimized_fetch=False, optimized_fetch=False,
retry_fetches=0,
prune=False, prune=False,
submodules=False, submodules=False,
clone_filter=None): clone_filter=None):
@ -1532,7 +1543,7 @@ class Project(object):
current_branch_only=current_branch_only, current_branch_only=current_branch_only,
tags=tags, prune=prune, depth=depth, tags=tags, prune=prune, depth=depth,
submodules=submodules, force_sync=force_sync, submodules=submodules, force_sync=force_sync,
clone_filter=clone_filter): clone_filter=clone_filter, retry_fetches=retry_fetches):
return False return False
mp = self.manifest.manifestProject mp = self.manifest.manifestProject
@ -2334,8 +2345,10 @@ class Project(object):
depth=None, depth=None,
submodules=False, submodules=False,
force_sync=False, force_sync=False,
clone_filter=None): clone_filter=None,
retry_fetches=2,
retry_sleep_initial_sec=4.0,
retry_exp_factor=2.0):
is_sha1 = False is_sha1 = False
tag_name = None tag_name = None
# The depth should not be used when fetching to a mirror because # The depth should not be used when fetching to a mirror because
@ -2497,18 +2510,37 @@ class Project(object):
cmd.extend(spec) cmd.extend(spec)
ok = False # At least one retry minimum due to git remote prune.
for _i in range(2): retry_fetches = max(retry_fetches, 2)
retry_cur_sleep = retry_sleep_initial_sec
ok = prune_tried = False
for try_n in range(retry_fetches):
gitcmd = GitCommand(self, cmd, bare=True, ssh_proxy=ssh_proxy, gitcmd = GitCommand(self, cmd, bare=True, ssh_proxy=ssh_proxy,
merge_output=True, capture_stdout=quiet) merge_output=True, capture_stdout=quiet)
ret = gitcmd.Wait() ret = gitcmd.Wait()
if ret == 0: if ret == 0:
ok = True ok = True
break break
# If needed, run the 'git remote prune' the first time through the loop
elif (not _i and # Retry later due to HTTP 429 Too Many Requests.
"error:" in gitcmd.stderr and elif ('error:' in gitcmd.stderr and
"git remote prune" in gitcmd.stderr): 'HTTP 429' in gitcmd.stderr):
if not quiet:
print('429 received, sleeping: %s sec' % retry_cur_sleep,
file=sys.stderr)
time.sleep(retry_cur_sleep)
retry_cur_sleep = min(retry_exp_factor * retry_cur_sleep,
MAXIMUM_RETRY_SLEEP_SEC)
retry_cur_sleep *= (1 - random.uniform(-RETRY_JITTER_PERCENT,
RETRY_JITTER_PERCENT))
continue
# If this is not last attempt, try 'git remote prune'.
elif (try_n < retry_fetches - 1 and
'error:' in gitcmd.stderr and
'git remote prune' in gitcmd.stderr and
not prune_tried):
prune_tried = True
prunecmd = GitCommand(self, ['remote', 'prune', name], bare=True, prunecmd = GitCommand(self, ['remote', 'prune', name], bare=True,
ssh_proxy=ssh_proxy) ssh_proxy=ssh_proxy)
ret = prunecmd.Wait() ret = prunecmd.Wait()

View File

@ -265,6 +265,9 @@ later is required to fix a server side protocol bug.
p.add_option('--optimized-fetch', p.add_option('--optimized-fetch',
dest='optimized_fetch', action='store_true', dest='optimized_fetch', action='store_true',
help='only fetch projects fixed to sha1 if revision does not exist locally') help='only fetch projects fixed to sha1 if revision does not exist locally')
p.add_option('--retry-fetches',
default=0, action='store', type='int',
help='number of times to retry fetches on transient errors')
p.add_option('--prune', dest='prune', action='store_true', p.add_option('--prune', dest='prune', action='store_true',
help='delete refs that no longer exist on the remote') help='delete refs that no longer exist on the remote')
if show_smart: if show_smart:
@ -342,6 +345,7 @@ later is required to fix a server side protocol bug.
clone_bundle=opt.clone_bundle, clone_bundle=opt.clone_bundle,
tags=opt.tags, archive=self.manifest.IsArchive, tags=opt.tags, archive=self.manifest.IsArchive,
optimized_fetch=opt.optimized_fetch, optimized_fetch=opt.optimized_fetch,
retry_fetches=opt.retry_fetches,
prune=opt.prune, prune=opt.prune,
clone_filter=clone_filter) clone_filter=clone_filter)
self._fetch_times.Set(project, time.time() - start) self._fetch_times.Set(project, time.time() - start)
@ -777,6 +781,7 @@ later is required to fix a server side protocol bug.
current_branch_only=opt.current_branch_only, current_branch_only=opt.current_branch_only,
tags=opt.tags, tags=opt.tags,
optimized_fetch=opt.optimized_fetch, optimized_fetch=opt.optimized_fetch,
retry_fetches=opt.retry_fetches,
submodules=self.manifest.HasSubmodules, submodules=self.manifest.HasSubmodules,
clone_filter=self.manifest.CloneFilter) clone_filter=self.manifest.CloneFilter)
finish = time.time() finish = time.time()