checkout: add --jobs support

Use multiprocessing to run in parallel.  When operating on multiple
projects, this can speed things up.  Across 1000 repos, it goes from
~9sec to ~5sec with the default -j8.

Change-Id: Ida6dd565db78ff7bac0ecb25d2805e8a1bf78048
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297982
Reviewed-by: Chris Mcdonald <cjmcdonald@google.com>
Tested-by: Mike Frysinger <vapier@google.com>
This commit is contained in:
Mike Frysinger 2021-02-24 12:17:31 -05:00
parent ebf04a4404
commit 6093d99d13

View File

@ -12,8 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import functools
import multiprocessing
import sys import sys
from command import Command
from command import Command, DEFAULT_LOCAL_JOBS, WORKER_BATCH_SIZE
from progress import Progress from progress import Progress
@ -31,27 +34,41 @@ The command is equivalent to:
repo forall [<project>...] -c git checkout <branchname> repo forall [<project>...] -c git checkout <branchname>
""" """
PARALLEL_JOBS = DEFAULT_LOCAL_JOBS
def ValidateOptions(self, opt, args): def ValidateOptions(self, opt, args):
if not args: if not args:
self.Usage() self.Usage()
def _ExecuteOne(self, nb, project):
"""Checkout one project."""
return (project.CheckoutBranch(nb), project)
def Execute(self, opt, args): def Execute(self, opt, args):
nb = args[0] nb = args[0]
err = [] err = []
success = [] success = []
all_projects = self.GetProjects(args[1:]) all_projects = self.GetProjects(args[1:])
pm = Progress('Checkout %s' % nb, len(all_projects)) def _ProcessResults(results):
for project in all_projects: for status, project in results:
pm.update()
status = project.CheckoutBranch(nb)
if status is not None: if status is not None:
if status: if status:
success.append(project) success.append(project)
else: else:
err.append(project) err.append(project)
pm.update()
pm = Progress('Checkout %s' % nb, len(all_projects))
# NB: Multiprocessing is heavy, so don't spin it up for one job.
if len(all_projects) == 1 or opt.jobs == 1:
_ProcessResults(self._ExecuteOne(nb, x) for x in all_projects)
else:
with multiprocessing.Pool(opt.jobs) as pool:
results = pool.imap_unordered(
functools.partial(self._ExecuteOne, nb), all_projects,
chunksize=WORKER_BATCH_SIZE)
_ProcessResults(results)
pm.end() pm.end()
if err: if err: