git-repo/command.py

# Copyright (C) 2008 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import multiprocessing
import os
import optparse
import re
import sys

from event_log import EventLog
from error import NoSuchProjectError
from error import InvalidProjectGroupsError
import progress


# Number of projects to submit to a single worker process at a time.
# This number represents a tradeoff between the overhead of IPC and finer
# grained opportunity for parallelism. This particular value was chosen by
# iterating through powers of two until the overall performance no longer
# improved. The performance of this batch size is not a function of the
# number of cores on the system.
WORKER_BATCH_SIZE = 32


# How many jobs to run in parallel by default?  This assumes the jobs are
# largely I/O bound and do not hit the network.
DEFAULT_LOCAL_JOBS = min(os.cpu_count(), 8)


class Command(object):
  """Base class for any command line action in repo.
  """

  # Singleton for all commands to track overall repo command execution and
  # provide event summary to callers.  Only used by sync subcommand currently.
  #
  # NB: This is being replaced by git trace2 events.  See git_trace2_event_log.
  event_log = EventLog()

  # Whether this command is a "common" one, i.e. whether the user would commonly
  # use it or it's a more uncommon command.  This is used by the help command to
  # show short-vs-full summaries.
  COMMON = False

  # Whether this command supports running in parallel.  If greater than 0,
  # it is the number of parallel jobs to default to.
  PARALLEL_JOBS = None

  def __init__(self, repodir=None, client=None, manifest=None, gitc_manifest=None,
               git_event_log=None):
    self.repodir = repodir
    self.client = client
    self.manifest = manifest
    self.gitc_manifest = gitc_manifest
    self.git_event_log = git_event_log

    # Cache for the OptionParser property.
    self._optparse = None

  def WantPager(self, _opt):
    return False

  def ReadEnvironmentOptions(self, opts):
    """ Set options from environment variables. """

    env_options = self._RegisteredEnvironmentOptions()

    for env_key, opt_key in env_options.items():
      # Get the user-set option value if any
      opt_value = getattr(opts, opt_key)

      # If the value is set, it means the user has passed it as a command
      # line option, and we should use that.  Otherwise we can try to set it
      # with the value from the corresponding environment variable.
      if opt_value is not None:
        continue

      env_value = os.environ.get(env_key)
      if env_value is not None:
        setattr(opts, opt_key, env_value)

    return opts

  @property
  def OptionParser(self):
    if self._optparse is None:
      try:
        me = 'repo %s' % self.NAME
        usage = self.helpUsage.strip().replace('%prog', me)
      except AttributeError:
        usage = 'repo %s' % self.NAME
      epilog = 'Run `repo help %s` to view the detailed manual.' % self.NAME
      self._optparse = optparse.OptionParser(usage=usage, epilog=epilog)
      self._CommonOptions(self._optparse)
      self._Options(self._optparse)
    return self._optparse

  def _CommonOptions(self, p, opt_v=True):
    """Initialize the option parser with common options.

    These will show up for *all* subcommands, so use sparingly.
    NB: Keep in sync with repo:InitParser().
    """
    g = p.add_option_group('Logging options')
    opts = ['-v'] if opt_v else []
    g.add_option(*opts, '--verbose',
                 dest='output_mode', action='store_true',
                 help='show all output')
    g.add_option('-q', '--quiet',
                 dest='output_mode', action='store_false',
                 help='only show errors')

    if self.PARALLEL_JOBS is not None:
      p.add_option(
          '-j', '--jobs',
          type=int, default=self.PARALLEL_JOBS,
          help='number of jobs to run in parallel (default: %s)' % self.PARALLEL_JOBS)

  def _Options(self, p):
    """Initialize the option parser with subcommand-specific options."""

  def _RegisteredEnvironmentOptions(self):
    """Get options that can be set from environment variables.

    Return a dictionary mapping environment variable name
    to option key name that it can override.

    Example: {'REPO_MY_OPTION': 'my_option'}

    Will allow the option with key value 'my_option' to be set
    from the value in the environment variable named 'REPO_MY_OPTION'.

    Note: This does not work properly for options that are explicitly
    set to None by the user, or options that are defined with a
    default value other than None.

    """
    return {}

  def Usage(self):
    """Display usage and terminate.
    """
    self.OptionParser.print_usage()
    sys.exit(1)

  def CommonValidateOptions(self, opt, args):
    """Validate common options."""
    opt.quiet = opt.output_mode is False
    opt.verbose = opt.output_mode is True

  def ValidateOptions(self, opt, args):
    """Validate the user options & arguments before executing.

    This is meant to help break the code up into logical steps.  Some tips:
    * Use self.OptionParser.error to display CLI related errors.
    * Adjust opt member defaults as makes sense.
    * Adjust the args list, but do so inplace so the caller sees updates.
    * Try to avoid updating self state.  Leave that to Execute.
    """

  def Execute(self, opt, args):
    """Perform the action, after option parsing is complete.
    """
    raise NotImplementedError

  @staticmethod
  def ExecuteInParallel(jobs, func, inputs, callback, output=None, ordered=False):
    """Helper for managing parallel execution boiler plate.

    For subcommands that can easily split their work up.

    Args:
      jobs: How many parallel processes to use.
      func: The function to apply to each of the |inputs|.  Usually a
          functools.partial for wrapping additional arguments.  It will be run
          in a separate process, so it must be pickalable, so nested functions
          won't work.  Methods on the subcommand Command class should work.
      inputs: The list of items to process.  Must be a list.
      callback: The function to pass the results to for processing.  It will be
          executed in the main thread and process the results of |func| as they
          become available.  Thus it may be a local nested function.  Its return
          value is passed back directly.  It takes three arguments:
          - The processing pool (or None with one job).
          - The |output| argument.
          - An iterator for the results.
      output: An output manager.  May be progress.Progess or color.Coloring.
      ordered: Whether the jobs should be processed in order.

    Returns:
      The |callback| function's results are returned.
    """
    try:
      # NB: Multiprocessing is heavy, so don't spin it up for one job.
      if len(inputs) == 1 or jobs == 1:
        return callback(None, output, (func(x) for x in inputs))
      else:
        with multiprocessing.Pool(jobs) as pool:
          submit = pool.imap if ordered else pool.imap_unordered
          return callback(pool, output, submit(func, inputs, chunksize=WORKER_BATCH_SIZE))
    finally:
      if isinstance(output, progress.Progress):
        output.end()

  def _ResetPathToProjectMap(self, projects):
    self._by_path = dict((p.worktree, p) for p in projects)

  def _UpdatePathToProjectMap(self, project):
    self._by_path[project.worktree] = project

  def _GetProjectByPath(self, manifest, path):
    project = None
    if os.path.exists(path):
      oldpath = None
      while (path and
             path != oldpath and
             path != manifest.topdir):
        try:
          project = self._by_path[path]
          break
        except KeyError:
          oldpath = path
          path = os.path.dirname(path)
      if not project and path == manifest.topdir:
        try:
          project = self._by_path[path]
        except KeyError:
          pass
    else:
      try:
        project = self._by_path[path]
      except KeyError:
        pass
    return project

  def GetProjects(self, args, manifest=None, groups='', missing_ok=False,
                  submodules_ok=False):
    """A list of projects that match the arguments.
    """
    if not manifest:
      manifest = self.manifest
    all_projects_list = manifest.projects
    result = []

    mp = manifest.manifestProject

    if not groups:
      groups = manifest.GetGroupsStr()
    groups = [x for x in re.split(r'[,\s]+', groups) if x]

    if not args:
      derived_projects = {}
      for project in all_projects_list:
        if submodules_ok or project.sync_s:
          derived_projects.update((p.name, p)
                                  for p in project.GetDerivedSubprojects())
      all_projects_list.extend(derived_projects.values())
      for project in all_projects_list:
        if (missing_ok or project.Exists) and project.MatchesGroups(groups):
          result.append(project)
    else:
      self._ResetPathToProjectMap(all_projects_list)

      for arg in args:
        # We have to filter by manifest groups in case the requested project is
        # checked out multiple times or differently based on them.
        projects = [project for project in manifest.GetProjectsWithName(arg)
                    if project.MatchesGroups(groups)]

        if not projects:
          path = os.path.abspath(arg).replace('\\', '/')
          project = self._GetProjectByPath(manifest, path)

          # If it's not a derived project, update path->project mapping and
          # search again, as arg might actually point to a derived subproject.
          if (project and not project.Derived and (submodules_ok or
                                                   project.sync_s)):
            search_again = False
            for subproject in project.GetDerivedSubprojects():
              self._UpdatePathToProjectMap(subproject)
              search_again = True
            if search_again:
              project = self._GetProjectByPath(manifest, path) or project

          if project:
            projects = [project]

        if not projects:
          raise NoSuchProjectError(arg)

        for project in projects:
          if not missing_ok and not project.Exists:
            raise NoSuchProjectError('%s (%s)' % (arg, project.relpath))
          if not project.MatchesGroups(groups):
            raise InvalidProjectGroupsError(arg)

        result.extend(projects)

    def _getpath(x):
      return x.relpath
    result.sort(key=_getpath)
    return result

  def FindProjects(self, args, inverse=False):
    result = []
    patterns = [re.compile(r'%s' % a, re.IGNORECASE) for a in args]
    for project in self.GetProjects(''):
      for pattern in patterns:
        match = pattern.search(project.name) or pattern.search(project.relpath)
        if not inverse and match:
          result.append(project)
          break
        if inverse and match:
          break
      else:
        if inverse:
          result.append(project)
    result.sort(key=lambda project: project.relpath)
    return result


class InteractiveCommand(Command):
  """Command which requires user interaction on the tty and
     must not run within a pager, even if the user asks to.
  """

  def WantPager(self, _opt):
    return False


class PagedCommand(Command):
  """Command which defaults to output in a pager, as its
     display tends to be larger than one screen full.
  """

  def WantPager(self, _opt):
    return True


class MirrorSafeCommand(object):
  """Command permits itself to run within a mirror,
     and does not require a working directory.
  """


class GitcAvailableCommand(object):
  """Command that requires GITC to be available, but does
     not require the local client to be a GITC client.
  """


class GitcClientCommand(object):
  """Command that requires the local client to be a GITC
     client.
  """