From 19e409c81863878d5d313fdc40b3975b98602454 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 5 May 2021 19:44:35 -0400 Subject: [PATCH] ssh: move proxy usage to the sync subcommand The only time we really need ssh proxies is when we want to run many connections and reuse them. That only happens when running sync. Every other command makes at most two connections, and even then it's only one or none. So the effort of setting up & tearing down ssh proxies isn't worth it most of the time. The big reason we want to move this logic to sync is that it's now using multiprocessing for parallel work. The current ssh proxy code is all based on threads, which means none of the logic is working correctly. The current ssh design makes it hard to fix when all of the state lives in the global/module scope. So the first step to fixing this is top move the setup & teardown to the one place that really needs it: sync. No other commands will use proxies anymore, just direct connections. Bug: https://crbug.com/gerrit/12389 Change-Id: Ibd351acdec39a87562b3013637c5df4ea34e03c6 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/305485 Reviewed-by: Chris Mcdonald Tested-by: Mike Frysinger --- git_config.py | 8 ++++++++ main.py | 25 ++++++++++--------------- project.py | 11 ++++++----- subcmds/sync.py | 10 ++++++++-- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/git_config.py b/git_config.py index 1d8d1363..d7fef8ca 100644 --- a/git_config.py +++ b/git_config.py @@ -520,6 +520,14 @@ class Remote(object): return self.url.replace(longest, longestUrl, 1) def PreConnectFetch(self): + """Run any setup for this remote before we connect to it. + + In practice, if the remote is using SSH, we'll attempt to create a new + SSH master session to it for reuse across projects. + + Returns: + Whether the preconnect phase for this remote was successful. + """ connectionUrl = self._InsteadOf() return ssh.preconnect(connectionUrl) diff --git a/main.py b/main.py index 96744335..e399ddce 100755 --- a/main.py +++ b/main.py @@ -56,7 +56,6 @@ from error import RepoChangedException import gitc_utils from manifest_xml import GitcClient, RepoClient from pager import RunPager, TerminatePager -import ssh from wrapper import WrapperPath, Wrapper from subcmds import all_commands @@ -592,20 +591,16 @@ def _Main(argv): repo = _Repo(opt.repodir) try: - try: - ssh.init() - init_http() - name, gopts, argv = repo._ParseArgs(argv) - run = lambda: repo._Run(name, gopts, argv) or 0 - if gopts.trace_python: - import trace - tracer = trace.Trace(count=False, trace=True, timing=True, - ignoredirs=set(sys.path[1:])) - result = tracer.runfunc(run) - else: - result = run() - finally: - ssh.close() + init_http() + name, gopts, argv = repo._ParseArgs(argv) + run = lambda: repo._Run(name, gopts, argv) or 0 + if gopts.trace_python: + import trace + tracer = trace.Trace(count=False, trace=True, timing=True, + ignoredirs=set(sys.path[1:])) + result = tracer.runfunc(run) + else: + result = run() except KeyboardInterrupt: print('aborted by user', file=sys.stderr) result = 1 diff --git a/project.py b/project.py index 7b5b56d4..37558061 100644 --- a/project.py +++ b/project.py @@ -1050,6 +1050,7 @@ class Project(object): retry_fetches=0, prune=False, submodules=False, + ssh_proxy=None, clone_filter=None, partial_clone_exclude=set()): """Perform only the network IO portion of the sync process. @@ -1143,6 +1144,7 @@ class Project(object): alt_dir=alt_dir, current_branch_only=current_branch_only, tags=tags, prune=prune, depth=depth, submodules=submodules, force_sync=force_sync, + ssh_proxy=ssh_proxy, clone_filter=clone_filter, retry_fetches=retry_fetches): return False @@ -1994,6 +1996,7 @@ class Project(object): prune=False, depth=None, submodules=False, + ssh_proxy=None, force_sync=False, clone_filter=None, retry_fetches=2, @@ -2041,16 +2044,14 @@ class Project(object): if not name: name = self.remote.name - ssh_proxy = False remote = self.GetRemote(name) - if remote.PreConnectFetch(): - ssh_proxy = True + if not remote.PreConnectFetch(): + ssh_proxy = False if initial: if alt_dir and 'objects' == os.path.basename(alt_dir): ref_dir = os.path.dirname(alt_dir) packed_refs = os.path.join(self.gitdir, 'packed-refs') - remote = self.GetRemote(name) all_refs = self.bare_ref.all ids = set(all_refs.values()) @@ -2238,7 +2239,7 @@ class Project(object): name=name, quiet=quiet, verbose=verbose, output_redir=output_redir, current_branch_only=current_branch_only and depth, initial=False, alt_dir=alt_dir, - depth=None, clone_filter=clone_filter) + depth=None, ssh_proxy=ssh_proxy, clone_filter=clone_filter) return ok diff --git a/subcmds/sync.py b/subcmds/sync.py index 6f5b5644..28568062 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -57,6 +57,7 @@ from error import RepoChangedException, GitError, ManifestParseError import platform_utils from project import SyncBuffer from progress import Progress +import ssh from wrapper import Wrapper from manifest_xml import GitcManifest @@ -357,6 +358,7 @@ later is required to fix a server side protocol bug. optimized_fetch=opt.optimized_fetch, retry_fetches=opt.retry_fetches, prune=opt.prune, + ssh_proxy=True, clone_filter=self.manifest.CloneFilter, partial_clone_exclude=self.manifest.PartialCloneExclude) @@ -983,8 +985,12 @@ later is required to fix a server side protocol bug. self._fetch_times = _FetchTimes(self.manifest) if not opt.local_only: - self._FetchMain(opt, args, all_projects, err_event, manifest_name, - load_local_manifests) + try: + ssh.init() + self._FetchMain(opt, args, all_projects, err_event, manifest_name, + load_local_manifests) + finally: + ssh.close() # If we saw an error, exit with code 1 so that other scripts can check. if err_event.is_set():