sync: rework --jobs to provide better defaults

For --jobs-network, the logic is now:
* If the user specifies --jobs-network, use that.
* Else, if the user specifies --jobs, use that.
* Else, if the manifest specifies sync-j, use that.
* Else, default to 1.
Then we limit the jobs count based on the softlimit RLIMIT_NOFILE.

For --jobs-checkout, the logic is now:
* If the user specifies --jobs-checkout, use that.
* Else, if the user specifies --jobs, use that.
* Else, if the manifest specifies sync-j, use that.
* Else, default to DEFAULT_LOCAL_JOBS which is based on user's ncpus.
Then we limit the jobs count based on the softlimit RLIMIT_NOFILE.

For garbage collecting, the logic is now:
* If the user specifies --jobs, use that.
* Else, if the manifest specifies sync-j, use that.
* Else, default to the user's ncpus.
Then we limit the jobs count based on the softlimit RLIMIT_NOFILE.

Having to factor in the manifest settings makes this more complicated
which is why we delay processing of defaults until after we've synced
the manifest projects.

Bug: http://b/239712300
Change-Id: Id27cda63c76c156f1d63f6a20cb2c4ceeb3d547c
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/341394
Tested-by: Mike Frysinger <vapier@google.com>
Reviewed-by: LaMont Jones <lamontjones@google.com>
This commit is contained in:
Mike Frysinger 2022-07-20 17:15:29 -04:00
parent bddc964d93
commit 355f4398d8
2 changed files with 45 additions and 30 deletions

View File

@ -123,7 +123,7 @@ class _Default(object):
destBranchExpr = None destBranchExpr = None
upstreamExpr = None upstreamExpr = None
remote = None remote = None
sync_j = 1 sync_j = None
sync_c = False sync_c = False
sync_s = False sync_s = False
sync_tags = True sync_tags = True
@ -548,7 +548,7 @@ https://gerrit.googlesource.com/git-repo/+/HEAD/docs/manifest-format.md
if d.upstreamExpr: if d.upstreamExpr:
have_default = True have_default = True
e.setAttribute('upstream', d.upstreamExpr) e.setAttribute('upstream', d.upstreamExpr)
if d.sync_j > 1: if d.sync_j is not None:
have_default = True have_default = True
e.setAttribute('sync-j', '%d' % d.sync_j) e.setAttribute('sync-j', '%d' % d.sync_j)
if d.sync_c: if d.sync_c:
@ -1462,8 +1462,8 @@ https://gerrit.googlesource.com/git-repo/+/HEAD/docs/manifest-format.md
d.destBranchExpr = node.getAttribute('dest-branch') or None d.destBranchExpr = node.getAttribute('dest-branch') or None
d.upstreamExpr = node.getAttribute('upstream') or None d.upstreamExpr = node.getAttribute('upstream') or None
d.sync_j = XmlInt(node, 'sync-j', 1) d.sync_j = XmlInt(node, 'sync-j', None)
if d.sync_j <= 0: if d.sync_j is not None and d.sync_j <= 0:
raise ManifestParseError('%s: sync-j must be greater than 0, not "%s"' % raise ManifestParseError('%s: sync-j must be greater than 0, not "%s"' %
(self.manifestFile, d.sync_j)) (self.manifestFile, d.sync_j))

View File

@ -52,7 +52,7 @@ import git_superproject
import gitc_utils import gitc_utils
from project import Project from project import Project
from project import RemoteSpec from project import RemoteSpec
from command import Command, MirrorSafeCommand, WORKER_BATCH_SIZE from command import Command, DEFAULT_LOCAL_JOBS, MirrorSafeCommand, WORKER_BATCH_SIZE
from error import RepoChangedException, GitError, ManifestParseError from error import RepoChangedException, GitError, ManifestParseError
import platform_utils import platform_utils
from project import SyncBuffer from project import SyncBuffer
@ -65,7 +65,6 @@ _ONE_DAY_S = 24 * 60 * 60
class Sync(Command, MirrorSafeCommand): class Sync(Command, MirrorSafeCommand):
jobs = 1
COMMON = True COMMON = True
MULTI_MANIFEST_SUPPORT = True MULTI_MANIFEST_SUPPORT = True
helpSummary = "Update working tree to the latest revision" helpSummary = "Update working tree to the latest revision"
@ -168,21 +167,16 @@ If the remote SSH daemon is Gerrit Code Review, version 2.0.10 or
later is required to fix a server side protocol bug. later is required to fix a server side protocol bug.
""" """
PARALLEL_JOBS = 1 # A value of 0 means we want parallel jobs, but we'll determine the default
# value later on.
def _CommonOptions(self, p): PARALLEL_JOBS = 0
if self.outer_client and self.outer_client.manifest:
try:
self.PARALLEL_JOBS = self.outer_client.manifest.default.sync_j
except ManifestParseError:
pass
super()._CommonOptions(p)
def _Options(self, p, show_smart=True): def _Options(self, p, show_smart=True):
p.add_option('--jobs-network', default=None, type=int, metavar='JOBS', p.add_option('--jobs-network', default=None, type=int, metavar='JOBS',
help='number of network jobs to run in parallel (defaults to --jobs)') help='number of network jobs to run in parallel (defaults to --jobs or 1)')
p.add_option('--jobs-checkout', default=None, type=int, metavar='JOBS', p.add_option('--jobs-checkout', default=None, type=int, metavar='JOBS',
help='number of local checkout jobs to run in parallel (defaults to --jobs)') help='number of local checkout jobs to run in parallel (defaults to --jobs or '
f'{DEFAULT_LOCAL_JOBS})')
p.add_option('-f', '--force-broken', p.add_option('-f', '--force-broken',
dest='force_broken', action='store_true', dest='force_broken', action='store_true',
@ -451,7 +445,7 @@ later is required to fix a server side protocol bug.
def _Fetch(self, projects, opt, err_event, ssh_proxy): def _Fetch(self, projects, opt, err_event, ssh_proxy):
ret = True ret = True
jobs = opt.jobs_network if opt.jobs_network else self.jobs jobs = opt.jobs_network
fetched = set() fetched = set()
pm = Progress('Fetching', len(projects), delay=False, quiet=opt.quiet) pm = Progress('Fetching', len(projects), delay=False, quiet=opt.quiet)
@ -651,7 +645,7 @@ later is required to fix a server side protocol bug.
return ret return ret
return self.ExecuteInParallel( return self.ExecuteInParallel(
opt.jobs_checkout if opt.jobs_checkout else self.jobs, opt.jobs_checkout,
functools.partial(self._CheckoutOne, opt.detach_head, opt.force_sync), functools.partial(self._CheckoutOne, opt.detach_head, opt.force_sync),
all_projects, all_projects,
callback=_ProcessResults, callback=_ProcessResults,
@ -691,8 +685,7 @@ later is required to fix a server side protocol bug.
project.bare_git, project.bare_git,
) )
cpu_count = os.cpu_count() jobs = opt.jobs
jobs = min(self.jobs, cpu_count)
if jobs < 2: if jobs < 2:
for (run_gc, bare_git) in tidy_dirs.values(): for (run_gc, bare_git) in tidy_dirs.values():
@ -704,6 +697,7 @@ later is required to fix a server side protocol bug.
pm.end() pm.end()
return return
cpu_count = os.cpu_count()
config = {'pack.threads': cpu_count // jobs if cpu_count > jobs else 1} config = {'pack.threads': cpu_count // jobs if cpu_count > jobs else 1}
threads = set() threads = set()
@ -1011,9 +1005,6 @@ later is required to fix a server side protocol bug.
sys.exit(1) sys.exit(1)
self._ReloadManifest(manifest_name, mp.manifest) self._ReloadManifest(manifest_name, mp.manifest)
if opt.jobs is None:
self.jobs = mp.manifest.default.sync_j
def ValidateOptions(self, opt, args): def ValidateOptions(self, opt, args):
if opt.force_broken: if opt.force_broken:
print('warning: -f/--force-broken is now the default behavior, and the ' print('warning: -f/--force-broken is now the default behavior, and the '
@ -1036,12 +1027,6 @@ later is required to fix a server side protocol bug.
opt.prune = True opt.prune = True
def Execute(self, opt, args): def Execute(self, opt, args):
if opt.jobs:
self.jobs = opt.jobs
if self.jobs > 1:
soft_limit, _ = _rlimit_nofile()
self.jobs = min(self.jobs, (soft_limit - 5) // 3)
manifest = self.outer_manifest manifest = self.outer_manifest
if not opt.outer_manifest: if not opt.outer_manifest:
manifest = self.manifest manifest = self.manifest
@ -1092,6 +1077,36 @@ later is required to fix a server side protocol bug.
else: else:
print('Skipping update of local manifest project.') print('Skipping update of local manifest project.')
# Now that the manifests are up-to-date, setup the jobs value.
if opt.jobs is None:
# User has not made a choice, so use the manifest settings.
opt.jobs = mp.default.sync_j
if opt.jobs is not None:
# Neither user nor manifest have made a choice.
if opt.jobs_network is None:
opt.jobs_network = opt.jobs
if opt.jobs_checkout is None:
opt.jobs_checkout = opt.jobs
# Setup defaults if jobs==0.
if not opt.jobs:
if not opt.jobs_network:
opt.jobs_network = 1
if not opt.jobs_checkout:
opt.jobs_checkout = DEFAULT_LOCAL_JOBS
opt.jobs = os.cpu_count()
# Try to stay under user rlimit settings.
#
# Since each worker requires at 3 file descriptors to run `git fetch`, use
# that to scale down the number of jobs. Unfortunately there isn't an easy
# way to determine this reliably as systems change, but it was last measured
# by hand in 2011.
soft_limit, _ = _rlimit_nofile()
jobs_soft_limit = max(1, (soft_limit - 5) // 3)
opt.jobs = min(opt.jobs, jobs_soft_limit)
opt.jobs_network = min(opt.jobs_network, jobs_soft_limit)
opt.jobs_checkout = min(opt.jobs_checkout, jobs_soft_limit)
superproject_logging_data = {} superproject_logging_data = {}
self._UpdateProjectsRevisionId(opt, args, superproject_logging_data, self._UpdateProjectsRevisionId(opt, args, superproject_logging_data,
manifest) manifest)