From 355f4398d870573255574c895f808485310b5c10 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 20 Jul 2022 17:15:29 -0400 Subject: [PATCH] sync: rework --jobs to provide better defaults For --jobs-network, the logic is now: * If the user specifies --jobs-network, use that. * Else, if the user specifies --jobs, use that. * Else, if the manifest specifies sync-j, use that. * Else, default to 1. Then we limit the jobs count based on the softlimit RLIMIT_NOFILE. For --jobs-checkout, the logic is now: * If the user specifies --jobs-checkout, use that. * Else, if the user specifies --jobs, use that. * Else, if the manifest specifies sync-j, use that. * Else, default to DEFAULT_LOCAL_JOBS which is based on user's ncpus. Then we limit the jobs count based on the softlimit RLIMIT_NOFILE. For garbage collecting, the logic is now: * If the user specifies --jobs, use that. * Else, if the manifest specifies sync-j, use that. * Else, default to the user's ncpus. Then we limit the jobs count based on the softlimit RLIMIT_NOFILE. Having to factor in the manifest settings makes this more complicated which is why we delay processing of defaults until after we've synced the manifest projects. Bug: http://b/239712300 Change-Id: Id27cda63c76c156f1d63f6a20cb2c4ceeb3d547c Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/341394 Tested-by: Mike Frysinger Reviewed-by: LaMont Jones --- manifest_xml.py | 8 +++--- subcmds/sync.py | 67 ++++++++++++++++++++++++++++++------------------- 2 files changed, 45 insertions(+), 30 deletions(-) diff --git a/manifest_xml.py b/manifest_xml.py index 12614c64..ee513a7e 100644 --- a/manifest_xml.py +++ b/manifest_xml.py @@ -123,7 +123,7 @@ class _Default(object): destBranchExpr = None upstreamExpr = None remote = None - sync_j = 1 + sync_j = None sync_c = False sync_s = False sync_tags = True @@ -548,7 +548,7 @@ https://gerrit.googlesource.com/git-repo/+/HEAD/docs/manifest-format.md if d.upstreamExpr: have_default = True e.setAttribute('upstream', d.upstreamExpr) - if d.sync_j > 1: + if d.sync_j is not None: have_default = True e.setAttribute('sync-j', '%d' % d.sync_j) if d.sync_c: @@ -1462,8 +1462,8 @@ https://gerrit.googlesource.com/git-repo/+/HEAD/docs/manifest-format.md d.destBranchExpr = node.getAttribute('dest-branch') or None d.upstreamExpr = node.getAttribute('upstream') or None - d.sync_j = XmlInt(node, 'sync-j', 1) - if d.sync_j <= 0: + d.sync_j = XmlInt(node, 'sync-j', None) + if d.sync_j is not None and d.sync_j <= 0: raise ManifestParseError('%s: sync-j must be greater than 0, not "%s"' % (self.manifestFile, d.sync_j)) diff --git a/subcmds/sync.py b/subcmds/sync.py index fa61d551..57ee0540 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -52,7 +52,7 @@ import git_superproject import gitc_utils from project import Project from project import RemoteSpec -from command import Command, MirrorSafeCommand, WORKER_BATCH_SIZE +from command import Command, DEFAULT_LOCAL_JOBS, MirrorSafeCommand, WORKER_BATCH_SIZE from error import RepoChangedException, GitError, ManifestParseError import platform_utils from project import SyncBuffer @@ -65,7 +65,6 @@ _ONE_DAY_S = 24 * 60 * 60 class Sync(Command, MirrorSafeCommand): - jobs = 1 COMMON = True MULTI_MANIFEST_SUPPORT = True helpSummary = "Update working tree to the latest revision" @@ -168,21 +167,16 @@ If the remote SSH daemon is Gerrit Code Review, version 2.0.10 or later is required to fix a server side protocol bug. """ - PARALLEL_JOBS = 1 - - def _CommonOptions(self, p): - if self.outer_client and self.outer_client.manifest: - try: - self.PARALLEL_JOBS = self.outer_client.manifest.default.sync_j - except ManifestParseError: - pass - super()._CommonOptions(p) + # A value of 0 means we want parallel jobs, but we'll determine the default + # value later on. + PARALLEL_JOBS = 0 def _Options(self, p, show_smart=True): p.add_option('--jobs-network', default=None, type=int, metavar='JOBS', - help='number of network jobs to run in parallel (defaults to --jobs)') + help='number of network jobs to run in parallel (defaults to --jobs or 1)') p.add_option('--jobs-checkout', default=None, type=int, metavar='JOBS', - help='number of local checkout jobs to run in parallel (defaults to --jobs)') + help='number of local checkout jobs to run in parallel (defaults to --jobs or ' + f'{DEFAULT_LOCAL_JOBS})') p.add_option('-f', '--force-broken', dest='force_broken', action='store_true', @@ -451,7 +445,7 @@ later is required to fix a server side protocol bug. def _Fetch(self, projects, opt, err_event, ssh_proxy): ret = True - jobs = opt.jobs_network if opt.jobs_network else self.jobs + jobs = opt.jobs_network fetched = set() pm = Progress('Fetching', len(projects), delay=False, quiet=opt.quiet) @@ -651,7 +645,7 @@ later is required to fix a server side protocol bug. return ret return self.ExecuteInParallel( - opt.jobs_checkout if opt.jobs_checkout else self.jobs, + opt.jobs_checkout, functools.partial(self._CheckoutOne, opt.detach_head, opt.force_sync), all_projects, callback=_ProcessResults, @@ -691,8 +685,7 @@ later is required to fix a server side protocol bug. project.bare_git, ) - cpu_count = os.cpu_count() - jobs = min(self.jobs, cpu_count) + jobs = opt.jobs if jobs < 2: for (run_gc, bare_git) in tidy_dirs.values(): @@ -704,6 +697,7 @@ later is required to fix a server side protocol bug. pm.end() return + cpu_count = os.cpu_count() config = {'pack.threads': cpu_count // jobs if cpu_count > jobs else 1} threads = set() @@ -1011,9 +1005,6 @@ later is required to fix a server side protocol bug. sys.exit(1) self._ReloadManifest(manifest_name, mp.manifest) - if opt.jobs is None: - self.jobs = mp.manifest.default.sync_j - def ValidateOptions(self, opt, args): if opt.force_broken: print('warning: -f/--force-broken is now the default behavior, and the ' @@ -1036,12 +1027,6 @@ later is required to fix a server side protocol bug. opt.prune = True def Execute(self, opt, args): - if opt.jobs: - self.jobs = opt.jobs - if self.jobs > 1: - soft_limit, _ = _rlimit_nofile() - self.jobs = min(self.jobs, (soft_limit - 5) // 3) - manifest = self.outer_manifest if not opt.outer_manifest: manifest = self.manifest @@ -1092,6 +1077,36 @@ later is required to fix a server side protocol bug. else: print('Skipping update of local manifest project.') + # Now that the manifests are up-to-date, setup the jobs value. + if opt.jobs is None: + # User has not made a choice, so use the manifest settings. + opt.jobs = mp.default.sync_j + if opt.jobs is not None: + # Neither user nor manifest have made a choice. + if opt.jobs_network is None: + opt.jobs_network = opt.jobs + if opt.jobs_checkout is None: + opt.jobs_checkout = opt.jobs + # Setup defaults if jobs==0. + if not opt.jobs: + if not opt.jobs_network: + opt.jobs_network = 1 + if not opt.jobs_checkout: + opt.jobs_checkout = DEFAULT_LOCAL_JOBS + opt.jobs = os.cpu_count() + + # Try to stay under user rlimit settings. + # + # Since each worker requires at 3 file descriptors to run `git fetch`, use + # that to scale down the number of jobs. Unfortunately there isn't an easy + # way to determine this reliably as systems change, but it was last measured + # by hand in 2011. + soft_limit, _ = _rlimit_nofile() + jobs_soft_limit = max(1, (soft_limit - 5) // 3) + opt.jobs = min(opt.jobs, jobs_soft_limit) + opt.jobs_network = min(opt.jobs_network, jobs_soft_limit) + opt.jobs_checkout = min(opt.jobs_checkout, jobs_soft_limit) + superproject_logging_data = {} self._UpdateProjectsRevisionId(opt, args, superproject_logging_data, manifest)