git-repo/subcmds/status.py
Kimiyuki Onaka 0501b29e7a status: Use multiprocessing for repo status -j<num> instead of threading
This change increases the speed of the command with parallelization with
processes.  The parallelization with threads doesn't work well, and
increasing the number of jobs to many (8 threads ~) didn't increase the speed.
Possibly, the global interpreter lock of Python affects.

Bug: https://crbug.com/gerrit/12389
Change-Id: Icbe5df8ba037dd91422b96f4e43708068d7be924
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/279936
Tested-by: Kimiyuki Onaka <kimiyuki@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
2020-09-09 03:52:24 +00:00

180 lines
6.2 KiB
Python

# -*- coding:utf-8 -*-
#
# Copyright (C) 2008 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import functools
import glob
import multiprocessing
import os
from command import PagedCommand
from color import Coloring
import platform_utils
class Status(PagedCommand):
common = True
helpSummary = "Show the working tree status"
helpUsage = """
%prog [<project>...]
"""
helpDescription = """
'%prog' compares the working tree to the staging area (aka index),
and the most recent commit on this branch (HEAD), in each project
specified. A summary is displayed, one line per file where there
is a difference between these three states.
The -j/--jobs option can be used to run multiple status queries
in parallel.
The -o/--orphans option can be used to show objects that are in
the working directory, but not associated with a repo project.
This includes unmanaged top-level files and directories, but also
includes deeper items. For example, if dir/subdir/proj1 and
dir/subdir/proj2 are repo projects, dir/subdir/proj3 will be shown
if it is not known to repo.
# Status Display
The status display is organized into three columns of information,
for example if the file 'subcmds/status.py' is modified in the
project 'repo' on branch 'devwork':
project repo/ branch devwork
-m subcmds/status.py
The first column explains how the staging area (index) differs from
the last commit (HEAD). Its values are always displayed in upper
case and have the following meanings:
-: no difference
A: added (not in HEAD, in index )
M: modified ( in HEAD, in index, different content )
D: deleted ( in HEAD, not in index )
R: renamed (not in HEAD, in index, path changed )
C: copied (not in HEAD, in index, copied from another)
T: mode changed ( in HEAD, in index, same content )
U: unmerged; conflict resolution required
The second column explains how the working directory differs from
the index. Its values are always displayed in lower case and have
the following meanings:
-: new / unknown (not in index, in work tree )
m: modified ( in index, in work tree, modified )
d: deleted ( in index, not in work tree )
"""
def _Options(self, p):
p.add_option('-j', '--jobs',
dest='jobs', action='store', type='int', default=2,
help="number of projects to check simultaneously")
p.add_option('-o', '--orphans',
dest='orphans', action='store_true',
help="include objects in working directory outside of repo projects")
p.add_option('-q', '--quiet', action='store_true',
help="only print the name of modified projects")
def _StatusHelper(self, quiet, project):
"""Obtains the status for a specific project.
Obtains the status for a project, redirecting the output to
the specified object.
Args:
quiet: Where to output the status.
project: Project to get status of.
Returns:
The status of the project.
"""
return project.PrintWorkTreeStatus(quiet=quiet)
def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring):
"""find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'"""
status_header = ' --\t'
for item in dirs:
if not platform_utils.isdir(item):
outstring.append(''.join([status_header, item]))
continue
if item in proj_dirs:
continue
if item in proj_dirs_parents:
self._FindOrphans(glob.glob('%s/.*' % item) +
glob.glob('%s/*' % item),
proj_dirs, proj_dirs_parents, outstring)
continue
outstring.append(''.join([status_header, item, '/']))
def Execute(self, opt, args):
all_projects = self.GetProjects(args)
counter = 0
if opt.jobs == 1:
for project in all_projects:
state = project.PrintWorkTreeStatus(quiet=opt.quiet)
if state == 'CLEAN':
counter += 1
else:
with multiprocessing.Pool(opt.jobs) as pool:
states = pool.map(functools.partial(self._StatusHelper, opt.quiet), all_projects)
counter += states.count('CLEAN')
if not opt.quiet and len(all_projects) == counter:
print('nothing to commit (working directory clean)')
if opt.orphans:
proj_dirs = set()
proj_dirs_parents = set()
for project in self.GetProjects(None, missing_ok=True):
proj_dirs.add(project.relpath)
(head, _tail) = os.path.split(project.relpath)
while head != "":
proj_dirs_parents.add(head)
(head, _tail) = os.path.split(head)
proj_dirs.add('.repo')
class StatusColoring(Coloring):
def __init__(self, config):
Coloring.__init__(self, config, 'status')
self.project = self.printer('header', attr='bold')
self.untracked = self.printer('untracked', fg='red')
orig_path = os.getcwd()
try:
os.chdir(self.manifest.topdir)
outstring = []
self._FindOrphans(glob.glob('.*') +
glob.glob('*'),
proj_dirs, proj_dirs_parents, outstring)
if outstring:
output = StatusColoring(self.manifest.globalConfig)
output.project('Objects not within a project (orphans)')
output.nl()
for entry in outstring:
output.untracked(entry)
output.nl()
else:
print('No orphan files or directories')
finally:
# Restore CWD.
os.chdir(orig_path)