mirror of
https://gerrit.googlesource.com/git-repo
synced 2024-12-21 07:16:21 +00:00
grep: add --jobs support
Use multiprocessing to run in parallel. When operating on multiple projects, this can greatly speed things up. Across 1000 repos, it goes from ~40sec to ~16sec with the default -j8. The output processing does not appear to be a significant bottle neck -- it accounts for <1sec out of the ~16sec runtime. Thus we leave it in the main thread to simplify the code. Change-Id: I750b72c7711b0c5d26e65d480738fbaac3a69971 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297984 Reviewed-by: Chris Mcdonald <cjmcdonald@google.com> Tested-by: Mike Frysinger <vapier@google.com>
This commit is contained in:
parent
bec4fe8aa3
commit
d246d1fee7
138
subcmds/grep.py
138
subcmds/grep.py
@ -12,10 +12,12 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import functools
|
||||||
|
import multiprocessing
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from color import Coloring
|
from color import Coloring
|
||||||
from command import PagedCommand
|
from command import DEFAULT_LOCAL_JOBS, PagedCommand, WORKER_BATCH_SIZE
|
||||||
from error import GitError
|
from error import GitError
|
||||||
from git_command import GitCommand
|
from git_command import GitCommand
|
||||||
|
|
||||||
@ -61,6 +63,7 @@ contain a line that matches both expressions:
|
|||||||
repo grep --all-match -e NODE -e Unexpected
|
repo grep --all-match -e NODE -e Unexpected
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
PARALLEL_JOBS = DEFAULT_LOCAL_JOBS
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _carry_option(_option, opt_str, value, parser):
|
def _carry_option(_option, opt_str, value, parser):
|
||||||
@ -80,6 +83,7 @@ contain a line that matches both expressions:
|
|||||||
pt.append(value)
|
pt.append(value)
|
||||||
|
|
||||||
def _Options(self, p):
|
def _Options(self, p):
|
||||||
|
super()._Options(p)
|
||||||
g = p.add_option_group('Sources')
|
g = p.add_option_group('Sources')
|
||||||
g.add_option('--cached',
|
g.add_option('--cached',
|
||||||
action='callback', callback=self._carry_option,
|
action='callback', callback=self._carry_option,
|
||||||
@ -152,6 +156,72 @@ contain a line that matches both expressions:
|
|||||||
action='callback', callback=self._carry_option,
|
action='callback', callback=self._carry_option,
|
||||||
help='Show only file names not containing matching lines')
|
help='Show only file names not containing matching lines')
|
||||||
|
|
||||||
|
def _ExecuteOne(self, cmd_argv, project):
|
||||||
|
"""Process one project."""
|
||||||
|
try:
|
||||||
|
p = GitCommand(project,
|
||||||
|
cmd_argv,
|
||||||
|
bare=False,
|
||||||
|
capture_stdout=True,
|
||||||
|
capture_stderr=True)
|
||||||
|
except GitError as e:
|
||||||
|
return (project, -1, None, str(e))
|
||||||
|
|
||||||
|
return (project, p.Wait(), p.stdout, p.stderr)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _ProcessResults(out, full_name, have_rev, results):
|
||||||
|
git_failed = False
|
||||||
|
bad_rev = False
|
||||||
|
have_match = False
|
||||||
|
|
||||||
|
for project, rc, stdout, stderr in results:
|
||||||
|
if rc < 0:
|
||||||
|
git_failed = True
|
||||||
|
out.project('--- project %s ---' % project.relpath)
|
||||||
|
out.nl()
|
||||||
|
out.fail('%s', stderr)
|
||||||
|
out.nl()
|
||||||
|
continue
|
||||||
|
|
||||||
|
if rc:
|
||||||
|
# no results
|
||||||
|
if stderr:
|
||||||
|
if have_rev and 'fatal: ambiguous argument' in stderr:
|
||||||
|
bad_rev = True
|
||||||
|
else:
|
||||||
|
out.project('--- project %s ---' % project.relpath)
|
||||||
|
out.nl()
|
||||||
|
out.fail('%s', stderr.strip())
|
||||||
|
out.nl()
|
||||||
|
continue
|
||||||
|
have_match = True
|
||||||
|
|
||||||
|
# We cut the last element, to avoid a blank line.
|
||||||
|
r = stdout.split('\n')
|
||||||
|
r = r[0:-1]
|
||||||
|
|
||||||
|
if have_rev and full_name:
|
||||||
|
for line in r:
|
||||||
|
rev, line = line.split(':', 1)
|
||||||
|
out.write("%s", rev)
|
||||||
|
out.write(':')
|
||||||
|
out.project(project.relpath)
|
||||||
|
out.write('/')
|
||||||
|
out.write("%s", line)
|
||||||
|
out.nl()
|
||||||
|
elif full_name:
|
||||||
|
for line in r:
|
||||||
|
out.project(project.relpath)
|
||||||
|
out.write('/')
|
||||||
|
out.write("%s", line)
|
||||||
|
out.nl()
|
||||||
|
else:
|
||||||
|
for line in r:
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
return (git_failed, bad_rev, have_match)
|
||||||
|
|
||||||
def Execute(self, opt, args):
|
def Execute(self, opt, args):
|
||||||
out = GrepColoring(self.manifest.manifestProject.config)
|
out = GrepColoring(self.manifest.manifestProject.config)
|
||||||
|
|
||||||
@ -183,62 +253,18 @@ contain a line that matches both expressions:
|
|||||||
cmd_argv.extend(opt.revision)
|
cmd_argv.extend(opt.revision)
|
||||||
cmd_argv.append('--')
|
cmd_argv.append('--')
|
||||||
|
|
||||||
git_failed = False
|
process_results = functools.partial(
|
||||||
bad_rev = False
|
self._ProcessResults, out, full_name, have_rev)
|
||||||
have_match = False
|
# NB: Multiprocessing is heavy, so don't spin it up for one job.
|
||||||
|
if len(projects) == 1 or opt.jobs == 1:
|
||||||
for project in projects:
|
git_failed, bad_rev, have_match = process_results(
|
||||||
try:
|
self._ExecuteOne(cmd_argv, x) for x in projects)
|
||||||
p = GitCommand(project,
|
|
||||||
cmd_argv,
|
|
||||||
bare=False,
|
|
||||||
capture_stdout=True,
|
|
||||||
capture_stderr=True)
|
|
||||||
except GitError as e:
|
|
||||||
git_failed = True
|
|
||||||
out.project('--- project %s ---' % project.relpath)
|
|
||||||
out.nl()
|
|
||||||
out.fail('%s', str(e))
|
|
||||||
out.nl()
|
|
||||||
continue
|
|
||||||
|
|
||||||
if p.Wait() != 0:
|
|
||||||
# no results
|
|
||||||
#
|
|
||||||
if p.stderr:
|
|
||||||
if have_rev and 'fatal: ambiguous argument' in p.stderr:
|
|
||||||
bad_rev = True
|
|
||||||
else:
|
else:
|
||||||
out.project('--- project %s ---' % project.relpath)
|
with multiprocessing.Pool(opt.jobs) as pool:
|
||||||
out.nl()
|
results = pool.imap(
|
||||||
out.fail('%s', p.stderr.strip())
|
functools.partial(self._ExecuteOne, cmd_argv), projects,
|
||||||
out.nl()
|
chunksize=WORKER_BATCH_SIZE)
|
||||||
continue
|
git_failed, bad_rev, have_match = process_results(results)
|
||||||
have_match = True
|
|
||||||
|
|
||||||
# We cut the last element, to avoid a blank line.
|
|
||||||
#
|
|
||||||
r = p.stdout.split('\n')
|
|
||||||
r = r[0:-1]
|
|
||||||
|
|
||||||
if have_rev and full_name:
|
|
||||||
for line in r:
|
|
||||||
rev, line = line.split(':', 1)
|
|
||||||
out.write("%s", rev)
|
|
||||||
out.write(':')
|
|
||||||
out.project(project.relpath)
|
|
||||||
out.write('/')
|
|
||||||
out.write("%s", line)
|
|
||||||
out.nl()
|
|
||||||
elif full_name:
|
|
||||||
for line in r:
|
|
||||||
out.project(project.relpath)
|
|
||||||
out.write('/')
|
|
||||||
out.write("%s", line)
|
|
||||||
out.nl()
|
|
||||||
else:
|
|
||||||
for line in r:
|
|
||||||
print(line)
|
|
||||||
|
|
||||||
if git_failed:
|
if git_failed:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
Loading…
Reference in New Issue
Block a user