mirror of
https://gerrit.googlesource.com/git-repo
synced 2024-12-21 07:16:21 +00:00
grep: add --jobs support
Use multiprocessing to run in parallel. When operating on multiple projects, this can greatly speed things up. Across 1000 repos, it goes from ~40sec to ~16sec with the default -j8. The output processing does not appear to be a significant bottle neck -- it accounts for <1sec out of the ~16sec runtime. Thus we leave it in the main thread to simplify the code. Change-Id: I750b72c7711b0c5d26e65d480738fbaac3a69971 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297984 Reviewed-by: Chris Mcdonald <cjmcdonald@google.com> Tested-by: Mike Frysinger <vapier@google.com>
This commit is contained in:
parent
bec4fe8aa3
commit
d246d1fee7
138
subcmds/grep.py
138
subcmds/grep.py
@ -12,10 +12,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import functools
|
||||
import multiprocessing
|
||||
import sys
|
||||
|
||||
from color import Coloring
|
||||
from command import PagedCommand
|
||||
from command import DEFAULT_LOCAL_JOBS, PagedCommand, WORKER_BATCH_SIZE
|
||||
from error import GitError
|
||||
from git_command import GitCommand
|
||||
|
||||
@ -61,6 +63,7 @@ contain a line that matches both expressions:
|
||||
repo grep --all-match -e NODE -e Unexpected
|
||||
|
||||
"""
|
||||
PARALLEL_JOBS = DEFAULT_LOCAL_JOBS
|
||||
|
||||
@staticmethod
|
||||
def _carry_option(_option, opt_str, value, parser):
|
||||
@ -80,6 +83,7 @@ contain a line that matches both expressions:
|
||||
pt.append(value)
|
||||
|
||||
def _Options(self, p):
|
||||
super()._Options(p)
|
||||
g = p.add_option_group('Sources')
|
||||
g.add_option('--cached',
|
||||
action='callback', callback=self._carry_option,
|
||||
@ -152,6 +156,72 @@ contain a line that matches both expressions:
|
||||
action='callback', callback=self._carry_option,
|
||||
help='Show only file names not containing matching lines')
|
||||
|
||||
def _ExecuteOne(self, cmd_argv, project):
|
||||
"""Process one project."""
|
||||
try:
|
||||
p = GitCommand(project,
|
||||
cmd_argv,
|
||||
bare=False,
|
||||
capture_stdout=True,
|
||||
capture_stderr=True)
|
||||
except GitError as e:
|
||||
return (project, -1, None, str(e))
|
||||
|
||||
return (project, p.Wait(), p.stdout, p.stderr)
|
||||
|
||||
@staticmethod
|
||||
def _ProcessResults(out, full_name, have_rev, results):
|
||||
git_failed = False
|
||||
bad_rev = False
|
||||
have_match = False
|
||||
|
||||
for project, rc, stdout, stderr in results:
|
||||
if rc < 0:
|
||||
git_failed = True
|
||||
out.project('--- project %s ---' % project.relpath)
|
||||
out.nl()
|
||||
out.fail('%s', stderr)
|
||||
out.nl()
|
||||
continue
|
||||
|
||||
if rc:
|
||||
# no results
|
||||
if stderr:
|
||||
if have_rev and 'fatal: ambiguous argument' in stderr:
|
||||
bad_rev = True
|
||||
else:
|
||||
out.project('--- project %s ---' % project.relpath)
|
||||
out.nl()
|
||||
out.fail('%s', stderr.strip())
|
||||
out.nl()
|
||||
continue
|
||||
have_match = True
|
||||
|
||||
# We cut the last element, to avoid a blank line.
|
||||
r = stdout.split('\n')
|
||||
r = r[0:-1]
|
||||
|
||||
if have_rev and full_name:
|
||||
for line in r:
|
||||
rev, line = line.split(':', 1)
|
||||
out.write("%s", rev)
|
||||
out.write(':')
|
||||
out.project(project.relpath)
|
||||
out.write('/')
|
||||
out.write("%s", line)
|
||||
out.nl()
|
||||
elif full_name:
|
||||
for line in r:
|
||||
out.project(project.relpath)
|
||||
out.write('/')
|
||||
out.write("%s", line)
|
||||
out.nl()
|
||||
else:
|
||||
for line in r:
|
||||
print(line)
|
||||
|
||||
return (git_failed, bad_rev, have_match)
|
||||
|
||||
def Execute(self, opt, args):
|
||||
out = GrepColoring(self.manifest.manifestProject.config)
|
||||
|
||||
@ -183,62 +253,18 @@ contain a line that matches both expressions:
|
||||
cmd_argv.extend(opt.revision)
|
||||
cmd_argv.append('--')
|
||||
|
||||
git_failed = False
|
||||
bad_rev = False
|
||||
have_match = False
|
||||
|
||||
for project in projects:
|
||||
try:
|
||||
p = GitCommand(project,
|
||||
cmd_argv,
|
||||
bare=False,
|
||||
capture_stdout=True,
|
||||
capture_stderr=True)
|
||||
except GitError as e:
|
||||
git_failed = True
|
||||
out.project('--- project %s ---' % project.relpath)
|
||||
out.nl()
|
||||
out.fail('%s', str(e))
|
||||
out.nl()
|
||||
continue
|
||||
|
||||
if p.Wait() != 0:
|
||||
# no results
|
||||
#
|
||||
if p.stderr:
|
||||
if have_rev and 'fatal: ambiguous argument' in p.stderr:
|
||||
bad_rev = True
|
||||
process_results = functools.partial(
|
||||
self._ProcessResults, out, full_name, have_rev)
|
||||
# NB: Multiprocessing is heavy, so don't spin it up for one job.
|
||||
if len(projects) == 1 or opt.jobs == 1:
|
||||
git_failed, bad_rev, have_match = process_results(
|
||||
self._ExecuteOne(cmd_argv, x) for x in projects)
|
||||
else:
|
||||
out.project('--- project %s ---' % project.relpath)
|
||||
out.nl()
|
||||
out.fail('%s', p.stderr.strip())
|
||||
out.nl()
|
||||
continue
|
||||
have_match = True
|
||||
|
||||
# We cut the last element, to avoid a blank line.
|
||||
#
|
||||
r = p.stdout.split('\n')
|
||||
r = r[0:-1]
|
||||
|
||||
if have_rev and full_name:
|
||||
for line in r:
|
||||
rev, line = line.split(':', 1)
|
||||
out.write("%s", rev)
|
||||
out.write(':')
|
||||
out.project(project.relpath)
|
||||
out.write('/')
|
||||
out.write("%s", line)
|
||||
out.nl()
|
||||
elif full_name:
|
||||
for line in r:
|
||||
out.project(project.relpath)
|
||||
out.write('/')
|
||||
out.write("%s", line)
|
||||
out.nl()
|
||||
else:
|
||||
for line in r:
|
||||
print(line)
|
||||
with multiprocessing.Pool(opt.jobs) as pool:
|
||||
results = pool.imap(
|
||||
functools.partial(self._ExecuteOne, cmd_argv), projects,
|
||||
chunksize=WORKER_BATCH_SIZE)
|
||||
git_failed, bad_rev, have_match = process_results(results)
|
||||
|
||||
if git_failed:
|
||||
sys.exit(1)
|
||||
|
Loading…
Reference in New Issue
Block a user