status: Use multiprocessing for repo status -j<num> instead of threading

This change increases the speed of the command with parallelization with
processes.  The parallelization with threads doesn't work well, and
increasing the number of jobs to many (8 threads ~) didn't increase the speed.
Possibly, the global interpreter lock of Python affects.

Bug: https://crbug.com/gerrit/12389
Change-Id: Icbe5df8ba037dd91422b96f4e43708068d7be924
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/279936
Tested-by: Kimiyuki Onaka <kimiyuki@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
This commit is contained in:
Kimiyuki Onaka 2020-08-28 10:05:27 +09:00 committed by Mike Frysinger
parent 4e1fc1013c
commit 0501b29e7a
2 changed files with 22 additions and 33 deletions

View File

@ -3208,6 +3208,13 @@ class Project(object):
self._bare = bare self._bare = bare
self._gitdir = gitdir self._gitdir = gitdir
# __getstate__ and __setstate__ are required for pickling because __getattr__ exists.
def __getstate__(self):
return (self._project, self._bare, self._gitdir)
def __setstate__(self, state):
self._project, self._bare, self._gitdir = state
def LsOthers(self): def LsOthers(self):
p = GitCommand(self._project, p = GitCommand(self._project,
['ls-files', ['ls-files',

View File

@ -16,17 +16,13 @@
from __future__ import print_function from __future__ import print_function
import functools
import glob import glob
import itertools import multiprocessing
import os import os
from command import PagedCommand from command import PagedCommand
try:
import threading as _threading
except ImportError:
import dummy_threading as _threading
from color import Coloring from color import Coloring
import platform_utils import platform_utils
@ -95,25 +91,20 @@ the following meanings:
p.add_option('-q', '--quiet', action='store_true', p.add_option('-q', '--quiet', action='store_true',
help="only print the name of modified projects") help="only print the name of modified projects")
def _StatusHelper(self, project, clean_counter, sem, quiet): def _StatusHelper(self, quiet, project):
"""Obtains the status for a specific project. """Obtains the status for a specific project.
Obtains the status for a project, redirecting the output to Obtains the status for a project, redirecting the output to
the specified object. It will release the semaphore the specified object.
when done.
Args: Args:
quiet: Where to output the status.
project: Project to get status of. project: Project to get status of.
clean_counter: Counter for clean projects.
sem: Semaphore, will call release() when complete. Returns:
output: Where to output the status. The status of the project.
""" """
try: return project.PrintWorkTreeStatus(quiet=quiet)
state = project.PrintWorkTreeStatus(quiet=quiet)
if state == 'CLEAN':
next(clean_counter)
finally:
sem.release()
def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring): def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring):
"""find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'""" """find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'"""
@ -133,27 +124,18 @@ the following meanings:
def Execute(self, opt, args): def Execute(self, opt, args):
all_projects = self.GetProjects(args) all_projects = self.GetProjects(args)
counter = itertools.count() counter = 0
if opt.jobs == 1: if opt.jobs == 1:
for project in all_projects: for project in all_projects:
state = project.PrintWorkTreeStatus(quiet=opt.quiet) state = project.PrintWorkTreeStatus(quiet=opt.quiet)
if state == 'CLEAN': if state == 'CLEAN':
next(counter) counter += 1
else: else:
sem = _threading.Semaphore(opt.jobs) with multiprocessing.Pool(opt.jobs) as pool:
threads = [] states = pool.map(functools.partial(self._StatusHelper, opt.quiet), all_projects)
for project in all_projects: counter += states.count('CLEAN')
sem.acquire() if not opt.quiet and len(all_projects) == counter:
t = _threading.Thread(target=self._StatusHelper,
args=(project, counter, sem, opt.quiet))
threads.append(t)
t.daemon = True
t.start()
for t in threads:
t.join()
if not opt.quiet and len(all_projects) == next(counter):
print('nothing to commit (working directory clean)') print('nothing to commit (working directory clean)')
if opt.orphans: if opt.orphans: