From 9bc283e49bcb2663dc8c06a4efad289a3683aaa4 Mon Sep 17 00:00:00 2001 From: George Engelbrecht Date: Thu, 2 Apr 2020 12:36:09 -0600 Subject: [PATCH] sync: add retry to fetch operations Add retries with exponential backoff and jitter to the fetch operations. By default don't change behavior and enable behind the new flag '--fetch-retries'. Bug: https://crbug.com/1061473 Change-Id: I492710843985d00f81cbe3402dc56f2d21a45b35 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/261576 Reviewed-by: Mike Frysinger Tested-by: George Engelbrecht --- project.py | 50 ++++++++++++++++++++++++++++++++++++++++--------- subcmds/sync.py | 5 +++++ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/project.py b/project.py index d35ad52d..691e0d9b 100644 --- a/project.py +++ b/project.py @@ -55,6 +55,12 @@ else: input = raw_input # noqa: F821 +# Maximum sleep time allowed during retries. +MAXIMUM_RETRY_SLEEP_SEC = 3600.0 +# +-10% random jitter is added to each Fetches retry sleep duration. +RETRY_JITTER_PERCENT = 0.1 + + def _lwrite(path, content): lock = '%s.lock' % path @@ -875,6 +881,7 @@ class Project(object): is_derived=False, dest_branch=None, optimized_fetch=False, + retry_fetches=0, old_revision=None): """Init a Project object. @@ -901,6 +908,8 @@ class Project(object): dest_branch: The branch to which to push changes for review by default. optimized_fetch: If True, when a project is set to a sha1 revision, only fetch from the remote if the sha1 is not present locally. + retry_fetches: Retry remote fetches n times upon receiving transient error + with exponential backoff and jitter. old_revision: saved git commit id for open GITC projects. """ self.manifest = manifest @@ -936,6 +945,7 @@ class Project(object): self.use_git_worktrees = use_git_worktrees self.is_derived = is_derived self.optimized_fetch = optimized_fetch + self.retry_fetches = max(0, retry_fetches) self.subprojects = [] self.snapshots = {} @@ -1449,6 +1459,7 @@ class Project(object): tags=True, archive=False, optimized_fetch=False, + retry_fetches=0, prune=False, submodules=False, clone_filter=None): @@ -1532,7 +1543,7 @@ class Project(object): current_branch_only=current_branch_only, tags=tags, prune=prune, depth=depth, submodules=submodules, force_sync=force_sync, - clone_filter=clone_filter): + clone_filter=clone_filter, retry_fetches=retry_fetches): return False mp = self.manifest.manifestProject @@ -2334,8 +2345,10 @@ class Project(object): depth=None, submodules=False, force_sync=False, - clone_filter=None): - + clone_filter=None, + retry_fetches=2, + retry_sleep_initial_sec=4.0, + retry_exp_factor=2.0): is_sha1 = False tag_name = None # The depth should not be used when fetching to a mirror because @@ -2497,18 +2510,37 @@ class Project(object): cmd.extend(spec) - ok = False - for _i in range(2): + # At least one retry minimum due to git remote prune. + retry_fetches = max(retry_fetches, 2) + retry_cur_sleep = retry_sleep_initial_sec + ok = prune_tried = False + for try_n in range(retry_fetches): gitcmd = GitCommand(self, cmd, bare=True, ssh_proxy=ssh_proxy, merge_output=True, capture_stdout=quiet) ret = gitcmd.Wait() if ret == 0: ok = True break - # If needed, run the 'git remote prune' the first time through the loop - elif (not _i and - "error:" in gitcmd.stderr and - "git remote prune" in gitcmd.stderr): + + # Retry later due to HTTP 429 Too Many Requests. + elif ('error:' in gitcmd.stderr and + 'HTTP 429' in gitcmd.stderr): + if not quiet: + print('429 received, sleeping: %s sec' % retry_cur_sleep, + file=sys.stderr) + time.sleep(retry_cur_sleep) + retry_cur_sleep = min(retry_exp_factor * retry_cur_sleep, + MAXIMUM_RETRY_SLEEP_SEC) + retry_cur_sleep *= (1 - random.uniform(-RETRY_JITTER_PERCENT, + RETRY_JITTER_PERCENT)) + continue + + # If this is not last attempt, try 'git remote prune'. + elif (try_n < retry_fetches - 1 and + 'error:' in gitcmd.stderr and + 'git remote prune' in gitcmd.stderr and + not prune_tried): + prune_tried = True prunecmd = GitCommand(self, ['remote', 'prune', name], bare=True, ssh_proxy=ssh_proxy) ret = prunecmd.Wait() diff --git a/subcmds/sync.py b/subcmds/sync.py index de6deecb..efd39616 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -265,6 +265,9 @@ later is required to fix a server side protocol bug. p.add_option('--optimized-fetch', dest='optimized_fetch', action='store_true', help='only fetch projects fixed to sha1 if revision does not exist locally') + p.add_option('--retry-fetches', + default=0, action='store', type='int', + help='number of times to retry fetches on transient errors') p.add_option('--prune', dest='prune', action='store_true', help='delete refs that no longer exist on the remote') if show_smart: @@ -342,6 +345,7 @@ later is required to fix a server side protocol bug. clone_bundle=opt.clone_bundle, tags=opt.tags, archive=self.manifest.IsArchive, optimized_fetch=opt.optimized_fetch, + retry_fetches=opt.retry_fetches, prune=opt.prune, clone_filter=clone_filter) self._fetch_times.Set(project, time.time() - start) @@ -777,6 +781,7 @@ later is required to fix a server side protocol bug. current_branch_only=opt.current_branch_only, tags=opt.tags, optimized_fetch=opt.optimized_fetch, + retry_fetches=opt.retry_fetches, submodules=self.manifest.HasSubmodules, clone_filter=self.manifest.CloneFilter) finish = time.time()