git-repo/subcmds/forall.py
Mike Frysinger 15e807cf3c forall: improve pool logic
Use a pool contextmanager to take care of the messy details like
properly cleaning it up when aborting.

Change-Id: I264ebb591c2e67c9a975b6dcc0f14b29cc66a874
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297243
Reviewed-by: Chris Mcdonald <cjmcdonald@google.com>
Tested-by: Mike Frysinger <vapier@google.com>
2021-02-22 22:51:46 +00:00

399 lines
12 KiB
Python

# Copyright (C) 2008 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import errno
import multiprocessing
import re
import os
import signal
import sys
import subprocess
from color import Coloring
from command import DEFAULT_LOCAL_JOBS, Command, MirrorSafeCommand, WORKER_BATCH_SIZE
import platform_utils
_CAN_COLOR = [
'branch',
'diff',
'grep',
'log',
]
class ForallColoring(Coloring):
def __init__(self, config):
Coloring.__init__(self, config, 'forall')
self.project = self.printer('project', attr='bold')
class Forall(Command, MirrorSafeCommand):
common = False
helpSummary = "Run a shell command in each project"
helpUsage = """
%prog [<project>...] -c <command> [<arg>...]
%prog -r str1 [str2] ... -c <command> [<arg>...]"
"""
helpDescription = """
Executes the same shell command in each project.
The -r option allows running the command only on projects matching
regex or wildcard expression.
# Output Formatting
The -p option causes '%prog' to bind pipes to the command's stdin,
stdout and stderr streams, and pipe all output into a continuous
stream that is displayed in a single pager session. Project headings
are inserted before the output of each command is displayed. If the
command produces no output in a project, no heading is displayed.
The formatting convention used by -p is very suitable for some
types of searching, e.g. `repo forall -p -c git log -SFoo` will
print all commits that add or remove references to Foo.
The -v option causes '%prog' to display stderr messages if a
command produces output only on stderr. Normally the -p option
causes command output to be suppressed until the command produces
at least one byte of output on stdout.
# Environment
pwd is the project's working directory. If the current client is
a mirror client, then pwd is the Git repository.
REPO_PROJECT is set to the unique name of the project.
REPO_PATH is the path relative the the root of the client.
REPO_REMOTE is the name of the remote system from the manifest.
REPO_LREV is the name of the revision from the manifest, translated
to a local tracking branch. If you need to pass the manifest
revision to a locally executed git command, use REPO_LREV.
REPO_RREV is the name of the revision from the manifest, exactly
as written in the manifest.
REPO_COUNT is the total number of projects being iterated.
REPO_I is the current (1-based) iteration count. Can be used in
conjunction with REPO_COUNT to add a simple progress indicator to your
command.
REPO__* are any extra environment variables, specified by the
"annotation" element under any project element. This can be useful
for differentiating trees based on user-specific criteria, or simply
annotating tree details.
shell positional arguments ($1, $2, .., $#) are set to any arguments
following <command>.
Example: to list projects:
%prog -c 'echo $REPO_PROJECT'
Notice that $REPO_PROJECT is quoted to ensure it is expanded in
the context of running <command> instead of in the calling shell.
Unless -p is used, stdin, stdout, stderr are inherited from the
terminal and are not redirected.
If -e is used, when a command exits unsuccessfully, '%prog' will abort
without iterating through the remaining projects.
"""
PARALLEL_JOBS = DEFAULT_LOCAL_JOBS
def _Options(self, p):
super()._Options(p)
def cmd(option, opt_str, value, parser):
setattr(parser.values, option.dest, list(parser.rargs))
while parser.rargs:
del parser.rargs[0]
p.add_option('-r', '--regex',
dest='regex', action='store_true',
help="Execute the command only on projects matching regex or wildcard expression")
p.add_option('-i', '--inverse-regex',
dest='inverse_regex', action='store_true',
help="Execute the command only on projects not matching regex or "
"wildcard expression")
p.add_option('-g', '--groups',
dest='groups',
help="Execute the command only on projects matching the specified groups")
p.add_option('-c', '--command',
help='Command (and arguments) to execute',
dest='command',
action='callback',
callback=cmd)
p.add_option('-e', '--abort-on-errors',
dest='abort_on_errors', action='store_true',
help='Abort if a command exits unsuccessfully')
p.add_option('--ignore-missing', action='store_true',
help='Silently skip & do not exit non-zero due missing '
'checkouts')
g = p.add_option_group('Output')
g.add_option('-p',
dest='project_header', action='store_true',
help='Show project headers before output')
g.add_option('-v', '--verbose',
dest='verbose', action='store_true',
help='Show command error messages')
def WantPager(self, opt):
return opt.project_header and opt.jobs == 1
def _SerializeProject(self, project):
""" Serialize a project._GitGetByExec instance.
project._GitGetByExec is not pickle-able. Instead of trying to pass it
around between processes, make a dict ourselves containing only the
attributes that we need.
"""
if not self.manifest.IsMirror:
lrev = project.GetRevisionId()
else:
lrev = None
return {
'name': project.name,
'relpath': project.relpath,
'remote_name': project.remote.name,
'lrev': lrev,
'rrev': project.revisionExpr,
'annotations': dict((a.name, a.value) for a in project.annotations),
'gitdir': project.gitdir,
'worktree': project.worktree,
'upstream': project.upstream,
'dest_branch': project.dest_branch,
}
def ValidateOptions(self, opt, args):
if not opt.command:
self.Usage()
def Execute(self, opt, args):
cmd = [opt.command[0]]
shell = True
if re.compile(r'^[a-z0-9A-Z_/\.-]+$').match(cmd[0]):
shell = False
if shell:
cmd.append(cmd[0])
cmd.extend(opt.command[1:])
if opt.project_header \
and not shell \
and cmd[0] == 'git':
# If this is a direct git command that can enable colorized
# output and the user prefers coloring, add --color into the
# command line because we are going to wrap the command into
# a pipe and git won't know coloring should activate.
#
for cn in cmd[1:]:
if not cn.startswith('-'):
break
else:
cn = None
if cn and cn in _CAN_COLOR:
class ColorCmd(Coloring):
def __init__(self, config, cmd):
Coloring.__init__(self, config, cmd)
if ColorCmd(self.manifest.manifestProject.config, cn).is_on:
cmd.insert(cmd.index(cn) + 1, '--color')
mirror = self.manifest.IsMirror
rc = 0
smart_sync_manifest_name = "smart_sync_override.xml"
smart_sync_manifest_path = os.path.join(
self.manifest.manifestProject.worktree, smart_sync_manifest_name)
if os.path.isfile(smart_sync_manifest_path):
self.manifest.Override(smart_sync_manifest_path)
if opt.regex:
projects = self.FindProjects(args)
elif opt.inverse_regex:
projects = self.FindProjects(args, inverse=True)
else:
projects = self.GetProjects(args, groups=opt.groups)
os.environ['REPO_COUNT'] = str(len(projects))
try:
config = self.manifest.manifestProject.config
with multiprocessing.Pool(opt.jobs, InitWorker) as pool:
results_it = pool.imap(
DoWorkWrapper,
self.ProjectArgs(projects, mirror, opt, cmd, shell, config),
chunksize=WORKER_BATCH_SIZE)
for r in results_it:
rc = rc or r
if r != 0 and opt.abort_on_errors:
raise Exception('Aborting due to previous error')
except (KeyboardInterrupt, WorkerKeyboardInterrupt):
# Catch KeyboardInterrupt raised inside and outside of workers
rc = rc or errno.EINTR
except Exception as e:
# Catch any other exceptions raised
print('Got an error, terminating the pool: %s: %s' %
(type(e).__name__, e),
file=sys.stderr)
rc = rc or getattr(e, 'errno', 1)
if rc != 0:
sys.exit(rc)
def ProjectArgs(self, projects, mirror, opt, cmd, shell, config):
for cnt, p in enumerate(projects):
try:
project = self._SerializeProject(p)
except Exception as e:
print('Project list error on project %s: %s: %s' %
(p.name, type(e).__name__, e),
file=sys.stderr)
return
except KeyboardInterrupt:
print('Project list interrupted',
file=sys.stderr)
return
yield [mirror, opt, cmd, shell, cnt, config, project]
class WorkerKeyboardInterrupt(Exception):
""" Keyboard interrupt exception for worker processes. """
def InitWorker():
signal.signal(signal.SIGINT, signal.SIG_IGN)
def DoWorkWrapper(args):
""" A wrapper around the DoWork() method.
Catch the KeyboardInterrupt exceptions here and re-raise them as a different,
``Exception``-based exception to stop it flooding the console with stacktraces
and making the parent hang indefinitely.
"""
project = args.pop()
try:
return DoWork(project, *args)
except KeyboardInterrupt:
print('%s: Worker interrupted' % project['name'])
raise WorkerKeyboardInterrupt()
def DoWork(project, mirror, opt, cmd, shell, cnt, config):
env = os.environ.copy()
def setenv(name, val):
if val is None:
val = ''
env[name] = val
setenv('REPO_PROJECT', project['name'])
setenv('REPO_PATH', project['relpath'])
setenv('REPO_REMOTE', project['remote_name'])
setenv('REPO_LREV', project['lrev'])
setenv('REPO_RREV', project['rrev'])
setenv('REPO_UPSTREAM', project['upstream'])
setenv('REPO_DEST_BRANCH', project['dest_branch'])
setenv('REPO_I', str(cnt + 1))
for name in project['annotations']:
setenv("REPO__%s" % (name), project['annotations'][name])
if mirror:
setenv('GIT_DIR', project['gitdir'])
cwd = project['gitdir']
else:
cwd = project['worktree']
if not os.path.exists(cwd):
# Allow the user to silently ignore missing checkouts so they can run on
# partial checkouts (good for infra recovery tools).
if opt.ignore_missing:
return 0
if ((opt.project_header and opt.verbose)
or not opt.project_header):
print('skipping %s/' % project['relpath'], file=sys.stderr)
return 1
if opt.project_header:
stdin = subprocess.PIPE
stdout = subprocess.PIPE
stderr = subprocess.PIPE
else:
stdin = None
stdout = None
stderr = None
p = subprocess.Popen(cmd,
cwd=cwd,
shell=shell,
env=env,
stdin=stdin,
stdout=stdout,
stderr=stderr)
if opt.project_header:
out = ForallColoring(config)
out.redirect(sys.stdout)
empty = True
errbuf = ''
p.stdin.close()
s_in = platform_utils.FileDescriptorStreams.create()
s_in.add(p.stdout, sys.stdout, 'stdout')
s_in.add(p.stderr, sys.stderr, 'stderr')
while not s_in.is_done:
in_ready = s_in.select()
for s in in_ready:
buf = s.read().decode()
if not buf:
s_in.remove(s)
s.close()
continue
if not opt.verbose:
if s.std_name == 'stderr':
errbuf += buf
continue
if empty and out:
if not cnt == 0:
out.nl()
if mirror:
project_header_path = project['name']
else:
project_header_path = project['relpath']
out.project('project %s/', project_header_path)
out.nl()
out.flush()
if errbuf:
sys.stderr.write(errbuf)
sys.stderr.flush()
errbuf = ''
empty = False
s.dest.write(buf)
s.dest.flush()
r = p.wait()
return r