From 6093d99d13deada12a2365e81e2bd148dbb423ad Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 24 Feb 2021 12:17:31 -0500 Subject: [PATCH] checkout: add --jobs support Use multiprocessing to run in parallel. When operating on multiple projects, this can speed things up. Across 1000 repos, it goes from ~9sec to ~5sec with the default -j8. Change-Id: Ida6dd565db78ff7bac0ecb25d2805e8a1bf78048 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297982 Reviewed-by: Chris Mcdonald Tested-by: Mike Frysinger --- subcmds/checkout.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/subcmds/checkout.py b/subcmds/checkout.py index fbb13653..cf54ced7 100644 --- a/subcmds/checkout.py +++ b/subcmds/checkout.py @@ -12,8 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import functools +import multiprocessing import sys -from command import Command + +from command import Command, DEFAULT_LOCAL_JOBS, WORKER_BATCH_SIZE from progress import Progress @@ -31,27 +34,41 @@ The command is equivalent to: repo forall [...] -c git checkout """ + PARALLEL_JOBS = DEFAULT_LOCAL_JOBS def ValidateOptions(self, opt, args): if not args: self.Usage() + def _ExecuteOne(self, nb, project): + """Checkout one project.""" + return (project.CheckoutBranch(nb), project) + def Execute(self, opt, args): nb = args[0] err = [] success = [] all_projects = self.GetProjects(args[1:]) - pm = Progress('Checkout %s' % nb, len(all_projects)) - for project in all_projects: - pm.update() + def _ProcessResults(results): + for status, project in results: + if status is not None: + if status: + success.append(project) + else: + err.append(project) + pm.update() - status = project.CheckoutBranch(nb) - if status is not None: - if status: - success.append(project) - else: - err.append(project) + pm = Progress('Checkout %s' % nb, len(all_projects)) + # NB: Multiprocessing is heavy, so don't spin it up for one job. + if len(all_projects) == 1 or opt.jobs == 1: + _ProcessResults(self._ExecuteOne(nb, x) for x in all_projects) + else: + with multiprocessing.Pool(opt.jobs) as pool: + results = pool.imap_unordered( + functools.partial(self._ExecuteOne, nb), all_projects, + chunksize=WORKER_BATCH_SIZE) + _ProcessResults(results) pm.end() if err: