git-repo/subcmds/forall.py
Josip Sokcevic e219c78fe5 forall: Fix returning results early
rc should be returned only after all results are processed.

R=jojwang@google.com

Bug: b/376454189
Change-Id: I8200b9954240dd3e8e9f2ab82494779a3cb38627
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/440901
Tested-by: Josip Sokcevic <sokcevic@google.com>
Commit-Queue: Josip Sokcevic <sokcevic@google.com>
Reviewed-by: Joanna Wang <jojwang@google.com>
2024-10-30 16:11:04 +00:00

422 lines
14 KiB
Python

# Copyright (C) 2008 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import errno
import functools
import io
import os
import re
import signal
import subprocess
import sys
from color import Coloring
from command import Command
from command import DEFAULT_LOCAL_JOBS
from command import MirrorSafeCommand
from error import ManifestInvalidRevisionError
from repo_logging import RepoLogger
logger = RepoLogger(__file__)
_CAN_COLOR = [
"branch",
"diff",
"grep",
"log",
]
class ForallColoring(Coloring):
def __init__(self, config):
Coloring.__init__(self, config, "forall")
self.project = self.printer("project", attr="bold")
class Forall(Command, MirrorSafeCommand):
COMMON = False
helpSummary = "Run a shell command in each project"
helpUsage = """
%prog [<project>...] -c <command> [<arg>...]
%prog -r str1 [str2] ... -c <command> [<arg>...]
"""
helpDescription = """
Executes the same shell command in each project.
The -r option allows running the command only on projects matching
regex or wildcard expression.
By default, projects are processed non-interactively in parallel. If you want
to run interactive commands, make sure to pass --interactive to force --jobs 1.
While the processing order of projects is not guaranteed, the order of project
output is stable.
# Output Formatting
The -p option causes '%prog' to bind pipes to the command's stdin,
stdout and stderr streams, and pipe all output into a continuous
stream that is displayed in a single pager session. Project headings
are inserted before the output of each command is displayed. If the
command produces no output in a project, no heading is displayed.
The formatting convention used by -p is very suitable for some
types of searching, e.g. `repo forall -p -c git log -SFoo` will
print all commits that add or remove references to Foo.
The -v option causes '%prog' to display stderr messages if a
command produces output only on stderr. Normally the -p option
causes command output to be suppressed until the command produces
at least one byte of output on stdout.
# Environment
pwd is the project's working directory. If the current client is
a mirror client, then pwd is the Git repository.
REPO_PROJECT is set to the unique name of the project.
REPO_PATH is the path relative the the root of the client.
REPO_OUTERPATH is the path of the sub manifest's root relative to the root of
the client.
REPO_INNERPATH is the path relative to the root of the sub manifest.
REPO_REMOTE is the name of the remote system from the manifest.
REPO_LREV is the name of the revision from the manifest, translated
to a local tracking branch. If you need to pass the manifest
revision to a locally executed git command, use REPO_LREV.
REPO_RREV is the name of the revision from the manifest, exactly
as written in the manifest.
REPO_COUNT is the total number of projects being iterated.
REPO_I is the current (1-based) iteration count. Can be used in
conjunction with REPO_COUNT to add a simple progress indicator to your
command.
REPO__* are any extra environment variables, specified by the
"annotation" element under any project element. This can be useful
for differentiating trees based on user-specific criteria, or simply
annotating tree details.
shell positional arguments ($1, $2, .., $#) are set to any arguments
following <command>.
Example: to list projects:
%prog -c 'echo $REPO_PROJECT'
Notice that $REPO_PROJECT is quoted to ensure it is expanded in
the context of running <command> instead of in the calling shell.
Unless -p is used, stdin, stdout, stderr are inherited from the
terminal and are not redirected.
If -e is used, when a command exits unsuccessfully, '%prog' will abort
without iterating through the remaining projects.
"""
PARALLEL_JOBS = DEFAULT_LOCAL_JOBS
@staticmethod
def _cmd_option(option, _opt_str, _value, parser):
setattr(parser.values, option.dest, list(parser.rargs))
while parser.rargs:
del parser.rargs[0]
def _Options(self, p):
p.add_option(
"-r",
"--regex",
dest="regex",
action="store_true",
help="execute the command only on projects matching regex or "
"wildcard expression",
)
p.add_option(
"-i",
"--inverse-regex",
dest="inverse_regex",
action="store_true",
help="execute the command only on projects not matching regex or "
"wildcard expression",
)
p.add_option(
"-g",
"--groups",
dest="groups",
help="execute the command only on projects matching the specified "
"groups",
)
p.add_option(
"-c",
"--command",
help="command (and arguments) to execute",
dest="command",
action="callback",
callback=self._cmd_option,
)
p.add_option(
"-e",
"--abort-on-errors",
dest="abort_on_errors",
action="store_true",
help="abort if a command exits unsuccessfully",
)
p.add_option(
"--ignore-missing",
action="store_true",
help="silently skip & do not exit non-zero due missing "
"checkouts",
)
g = p.get_option_group("--quiet")
g.add_option(
"-p",
dest="project_header",
action="store_true",
help="show project headers before output",
)
p.add_option(
"--interactive", action="store_true", help="force interactive usage"
)
def WantPager(self, opt):
return opt.project_header and opt.jobs == 1
def ValidateOptions(self, opt, args):
if not opt.command:
self.Usage()
def Execute(self, opt, args):
cmd = [opt.command[0]]
all_trees = not opt.this_manifest_only
shell = True
if re.compile(r"^[a-z0-9A-Z_/\.-]+$").match(cmd[0]):
shell = False
if shell:
cmd.append(cmd[0])
cmd.extend(opt.command[1:])
# Historically, forall operated interactively, and in serial. If the
# user has selected 1 job, then default to interacive mode.
if opt.jobs == 1:
opt.interactive = True
if opt.project_header and not shell and cmd[0] == "git":
# If this is a direct git command that can enable colorized
# output and the user prefers coloring, add --color into the
# command line because we are going to wrap the command into
# a pipe and git won't know coloring should activate.
#
for cn in cmd[1:]:
if not cn.startswith("-"):
break
else:
cn = None
if cn and cn in _CAN_COLOR:
class ColorCmd(Coloring):
def __init__(self, config, cmd):
Coloring.__init__(self, config, cmd)
if ColorCmd(self.manifest.manifestProject.config, cn).is_on:
cmd.insert(cmd.index(cn) + 1, "--color")
mirror = self.manifest.IsMirror
smart_sync_manifest_name = "smart_sync_override.xml"
smart_sync_manifest_path = os.path.join(
self.manifest.manifestProject.worktree, smart_sync_manifest_name
)
if os.path.isfile(smart_sync_manifest_path):
self.manifest.Override(smart_sync_manifest_path)
if opt.regex:
projects = self.FindProjects(args, all_manifests=all_trees)
elif opt.inverse_regex:
projects = self.FindProjects(
args, inverse=True, all_manifests=all_trees
)
else:
projects = self.GetProjects(
args, groups=opt.groups, all_manifests=all_trees
)
os.environ["REPO_COUNT"] = str(len(projects))
def _ProcessResults(_pool, _output, results):
rc = 0
first = True
for r, output in results:
if output:
if first:
first = False
elif opt.project_header:
print()
# To simplify the DoWorkWrapper, take care of automatic
# newlines.
end = "\n"
if output[-1] == "\n":
end = ""
print(output, end=end)
rc = rc or r
if r != 0 and opt.abort_on_errors:
raise Exception("Aborting due to previous error")
return rc
try:
config = self.manifest.manifestProject.config
with self.ParallelContext():
self.get_parallel_context()["projects"] = projects
rc = self.ExecuteInParallel(
opt.jobs,
functools.partial(
self.DoWorkWrapper, mirror, opt, cmd, shell, config
),
range(len(projects)),
callback=_ProcessResults,
ordered=True,
initializer=self.InitWorker,
chunksize=1,
)
except (KeyboardInterrupt, WorkerKeyboardInterrupt):
# Catch KeyboardInterrupt raised inside and outside of workers
rc = errno.EINTR
except Exception as e:
# Catch any other exceptions raised
logger.error(
"forall: unhandled error, terminating the pool: %s: %s",
type(e).__name__,
e,
)
rc = getattr(e, "errno", 1)
if rc != 0:
sys.exit(rc)
@classmethod
def InitWorker(cls):
signal.signal(signal.SIGINT, signal.SIG_IGN)
@classmethod
def DoWorkWrapper(cls, mirror, opt, cmd, shell, config, project_idx):
"""A wrapper around the DoWork() method.
Catch the KeyboardInterrupt exceptions here and re-raise them as a
different, ``Exception``-based exception to stop it flooding the console
with stacktraces and making the parent hang indefinitely.
"""
project = cls.get_parallel_context()["projects"][project_idx]
try:
return DoWork(project, mirror, opt, cmd, shell, project_idx, config)
except KeyboardInterrupt:
print("%s: Worker interrupted" % project.name)
raise WorkerKeyboardInterrupt()
class WorkerKeyboardInterrupt(Exception):
"""Keyboard interrupt exception for worker processes."""
def DoWork(project, mirror, opt, cmd, shell, cnt, config):
env = os.environ.copy()
def setenv(name, val):
if val is None:
val = ""
env[name] = val
setenv("REPO_PROJECT", project.name)
setenv("REPO_OUTERPATH", project.manifest.path_prefix)
setenv("REPO_INNERPATH", project.relpath)
setenv("REPO_PATH", project.RelPath(local=opt.this_manifest_only))
setenv("REPO_REMOTE", project.remote.name)
try:
# If we aren't in a fully synced state and we don't have the ref the
# manifest wants, then this will fail. Ignore it for the purposes of
# this code.
lrev = "" if mirror else project.GetRevisionId()
except ManifestInvalidRevisionError:
lrev = ""
setenv("REPO_LREV", lrev)
setenv("REPO_RREV", project.revisionExpr)
setenv("REPO_UPSTREAM", project.upstream)
setenv("REPO_DEST_BRANCH", project.dest_branch)
setenv("REPO_I", str(cnt + 1))
for annotation in project.annotations:
setenv("REPO__%s" % (annotation.name), annotation.value)
if mirror:
setenv("GIT_DIR", project.gitdir)
cwd = project.gitdir
else:
cwd = project.worktree
if not os.path.exists(cwd):
# Allow the user to silently ignore missing checkouts so they can run on
# partial checkouts (good for infra recovery tools).
if opt.ignore_missing:
return (0, "")
output = ""
if (opt.project_header and opt.verbose) or not opt.project_header:
output = "skipping %s/" % project.RelPath(
local=opt.this_manifest_only
)
return (1, output)
if opt.verbose:
stderr = subprocess.STDOUT
else:
stderr = subprocess.DEVNULL
stdin = None if opt.interactive else subprocess.DEVNULL
result = subprocess.run(
cmd,
cwd=cwd,
shell=shell,
env=env,
check=False,
encoding="utf-8",
errors="replace",
stdin=stdin,
stdout=subprocess.PIPE,
stderr=stderr,
)
output = result.stdout
if opt.project_header:
if output:
buf = io.StringIO()
out = ForallColoring(config)
out.redirect(buf)
if mirror:
project_header_path = project.name
else:
project_header_path = project.RelPath(
local=opt.this_manifest_only
)
out.project("project %s/" % project_header_path)
out.nl()
buf.write(output)
output = buf.getvalue()
return (result.returncode, output)