sync: superproject - support for switching hosts and switching branches.

+ superproject will be fetched into a directory with the name
  “<remote name>-superproject.git” instead of the current
  “superproject.git” folder.

+ Deleted  _Clone method and added _Init method.

+ _Init method will do “git init --bare <remote>-superproject.git”.
  It will create the folder and set up a bare repository in
  <remote>-superproject.git folder.

+ _Fetch method, will pass <remote url>, <branch> arguments.
  Moved the --filter argument from “git clone” to “git fetch”.
  _Fetch method will execute the following command to fetch
  superproject. Added --no-tags argument.

  master:  git fetch <remote url> --force --no-tags --filter blob:none
  branch:  git fetch <remote url> --force --no-tags --filter blob:none \
           <branch>:<branch>

+ Performance improvements for aosp-master
  ++ repo init performance improved from 35 seconds to 17 seconds.
  ++ repo init --use-superproject is around 5 to 7 secsonds slower.
  ++ repo sync --use-superproject is around 3 to 4 minutes faster.

Tested the code with the following commands.

$ ./run_tests -v

Tested the sync code by using repo_dev alias and pointing to this CL.

$ time repo_dev init -u sso://android.git.corp.google.com/platform/manifest -b master --partial-clone --clone-filter=blob:limit=10M --repo-rev=main --use-superproject
...
  real	0m20.648s
  user	0m8.046s
  sys	0m3.271s

+ Without superproject
$ time repo init -u sso://android.git.corp.google.com/platform/manifest -b master --partial-clone --clone-filter=blob:limit=10M --repo-rev=main
  real	0m13.078s
  user	0m9.783s
  sys	0m2.528s

$ time repo_dev sync -c -j32 --use-superproject
...
  real	15m7.072s
  user	110m7.216s
  sys	20m17.559s

+ Without superproject
$ time repo sync -c -j32
...
  real	19m25.644s
  user	91m56.331s
  sys	20m59.170s

Bug: [google internal] b/180492484
Bug: [google internal] b/179470886
Bug: [google internal] b/180124069
Bug: https://crbug.com/gerrit/13709
Bug: https://crbug.com/gerrit/13707

Change-Id: Ib04bd7f1e25ceb75532643e58ad0129300ba3299
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297702
Reviewed-by: Mike Frysinger <vapier@google.com>
Tested-by: Raman Tenneti <rtenneti@google.com>
This commit is contained in:
Raman Tenneti 2021-02-22 16:54:56 -08:00
parent 45ad1541c5
commit ceba2ddc13
2 changed files with 48 additions and 53 deletions

View File

@ -22,13 +22,13 @@ Examples:
project_commit_ids = superproject.UpdateProjectsRevisionId(projects) project_commit_ids = superproject.UpdateProjectsRevisionId(projects)
""" """
import hashlib
import os import os
import sys import sys
from error import BUG_REPORT_URL from error import BUG_REPORT_URL
from git_command import GitCommand from git_command import GitCommand
from git_refs import R_HEADS from git_refs import R_HEADS
import platform_utils
_SUPERPROJECT_GIT_NAME = 'superproject.git' _SUPERPROJECT_GIT_NAME = 'superproject.git'
_SUPERPROJECT_MANIFEST_NAME = 'superproject_override.xml' _SUPERPROJECT_MANIFEST_NAME = 'superproject_override.xml'
@ -37,9 +37,9 @@ _SUPERPROJECT_MANIFEST_NAME = 'superproject_override.xml'
class Superproject(object): class Superproject(object):
"""Get commit ids from superproject. """Get commit ids from superproject.
It does a 'git clone' of superproject and 'git ls-tree' to get list of commit ids Initializes a local copy of a superproject for the manifest. This allows
for all projects. It contains project_commit_ids which is a dictionary with lookup of commit ids for all projects. It contains _project_commit_ids which
project/commit id entries. is a dictionary with project/commit id entries.
""" """
def __init__(self, manifest, repodir, superproject_dir='exp-superproject'): def __init__(self, manifest, repodir, superproject_dir='exp-superproject'):
"""Initializes superproject. """Initializes superproject.
@ -58,8 +58,12 @@ class Superproject(object):
self._superproject_path = os.path.join(self._repodir, superproject_dir) self._superproject_path = os.path.join(self._repodir, superproject_dir)
self._manifest_path = os.path.join(self._superproject_path, self._manifest_path = os.path.join(self._superproject_path,
_SUPERPROJECT_MANIFEST_NAME) _SUPERPROJECT_MANIFEST_NAME)
self._work_git = os.path.join(self._superproject_path, git_name = ''
_SUPERPROJECT_GIT_NAME) if self._manifest.superproject:
remote_name = self._manifest.superproject['remote'].name
git_name = hashlib.md5(remote_name.encode('utf8')).hexdigest() + '-'
self._work_git_name = git_name + _SUPERPROJECT_GIT_NAME
self._work_git = os.path.join(self._superproject_path, self._work_git_name)
@property @property
def project_commit_ids(self): def project_commit_ids(self):
@ -77,20 +81,15 @@ class Superproject(object):
branch = branch[len(R_HEADS):] branch = branch[len(R_HEADS):]
return branch return branch
def _Clone(self, url): def _Init(self):
"""Do a 'git clone' for the given url. """Sets up a local Git repository to get a copy of a superproject.
Args:
url: superproject's url to be passed to git clone.
Returns: Returns:
True if git clone is successful, or False. True if initialization is successful, or False.
""" """
if not os.path.exists(self._superproject_path): if not os.path.exists(self._superproject_path):
os.mkdir(self._superproject_path) os.mkdir(self._superproject_path)
cmd = ['clone', url, '--filter', 'blob:none', '--bare'] cmd = ['init', '--bare', self._work_git_name]
if self._branch:
cmd += ['--branch', self._branch]
p = GitCommand(None, p = GitCommand(None,
cmd, cmd,
cwd=self._superproject_path, cwd=self._superproject_path,
@ -98,24 +97,27 @@ class Superproject(object):
capture_stderr=True) capture_stderr=True)
retval = p.Wait() retval = p.Wait()
if retval: if retval:
# `git clone` is documented to produce an exit status of `128` if print('repo: error: git init call failed with return code: %r, stderr: %r' %
# the requested url or branch are not present in the configuration.
print('repo: error: git clone call failed with return code: %r, stderr: %r' %
(retval, p.stderr), file=sys.stderr) (retval, p.stderr), file=sys.stderr)
return False return False
return True return True
def _Fetch(self): def _Fetch(self, url):
"""Do a 'git fetch' to to fetch the latest content. """Fetches a local copy of a superproject for the manifest based on url.
Args:
url: superproject's url.
Returns: Returns:
True if 'git fetch' is successful, or False. True if fetch is successful, or False.
""" """
if not os.path.exists(self._work_git): if not os.path.exists(self._work_git):
print('git fetch missing drectory: %s' % self._work_git, print('git fetch missing drectory: %s' % self._work_git,
file=sys.stderr) file=sys.stderr)
return False return False
cmd = ['fetch', 'origin', '+refs/heads/*:refs/heads/*', '--prune'] cmd = ['fetch', url, '--force', '--no-tags', '--filter', 'blob:none']
if self._branch:
cmd += [self._branch + ':' + self._branch]
p = GitCommand(None, p = GitCommand(None,
cmd, cmd,
cwd=self._work_git, cwd=self._work_git,
@ -129,7 +131,7 @@ class Superproject(object):
return True return True
def _LsTree(self): def _LsTree(self):
"""Returns the data from 'git ls-tree ...'. """Gets the commit ids for all projects.
Works only in git repositories. Works only in git repositories.
@ -153,14 +155,12 @@ class Superproject(object):
if retval == 0: if retval == 0:
data = p.stdout data = p.stdout
else: else:
# `git clone` is documented to produce an exit status of `128` if
# the requested url or branch are not present in the configuration.
print('repo: error: git ls-tree call failed with return code: %r, stderr: %r' % ( print('repo: error: git ls-tree call failed with return code: %r, stderr: %r' % (
retval, p.stderr), file=sys.stderr) retval, p.stderr), file=sys.stderr)
return data return data
def Sync(self): def Sync(self):
"""Sync superproject either by git clone/fetch. """Gets a local copy of a superproject for the manifest.
Returns: Returns:
True if sync of superproject is successful, or False. True if sync of superproject is successful, or False.
@ -179,16 +179,9 @@ class Superproject(object):
file=sys.stderr) file=sys.stderr)
return False return False
do_clone = True if not self._Init():
if os.path.exists(self._superproject_path): return False
if not self._Fetch(): if not self._Fetch(url):
# If fetch fails due to a corrupted git directory, then do a git clone.
platform_utils.rmtree(self._superproject_path)
else:
do_clone = False
if do_clone:
if not self._Clone(url):
print('error: git clone failed for url: %s' % url, file=sys.stderr)
return False return False
return True return True
@ -203,7 +196,8 @@ class Superproject(object):
data = self._LsTree() data = self._LsTree()
if not data: if not data:
print('error: git ls-tree failed for superproject', file=sys.stderr) print('error: git ls-tree failed to return data for superproject',
file=sys.stderr)
return None return None
# Parse lines like the following to select lines starting with '160000' and # Parse lines like the following to select lines starting with '160000' and

View File

@ -97,17 +97,17 @@ class SuperprojectTestCase(unittest.TestCase):
with mock.patch.object(self._superproject, '_GetBranch', return_value='junk'): with mock.patch.object(self._superproject, '_GetBranch', return_value='junk'):
self.assertFalse(superproject.Sync()) self.assertFalse(superproject.Sync())
def test_superproject_get_superproject_mock_clone(self): def test_superproject_get_superproject_mock_init(self):
"""Test with _Clone failing.""" """Test with _Init failing."""
with mock.patch.object(self._superproject, '_Clone', return_value=False): with mock.patch.object(self._superproject, '_Init', return_value=False):
self.assertFalse(self._superproject.Sync()) self.assertFalse(self._superproject.Sync())
def test_superproject_get_superproject_mock_fetch(self): def test_superproject_get_superproject_mock_fetch(self):
"""Test with _Fetch failing and _clone being called.""" """Test with _Fetch failing."""
with mock.patch.object(self._superproject, '_Clone', return_value=True): with mock.patch.object(self._superproject, '_Init', return_value=True):
os.mkdir(self._superproject._superproject_path) os.mkdir(self._superproject._superproject_path)
with mock.patch.object(self._superproject, '_Fetch', return_value=False): with mock.patch.object(self._superproject, '_Fetch', return_value=False):
self.assertTrue(self._superproject.Sync()) self.assertFalse(self._superproject.Sync())
def test_superproject_get_all_project_commit_ids_mock_ls_tree(self): def test_superproject_get_all_project_commit_ids_mock_ls_tree(self):
"""Test with LsTree being a mock.""" """Test with LsTree being a mock."""
@ -116,7 +116,8 @@ class SuperprojectTestCase(unittest.TestCase):
'160000 commit e9d25da64d8d365dbba7c8ee00fe8c4473fe9a06\tbootable/recovery\x00' '160000 commit e9d25da64d8d365dbba7c8ee00fe8c4473fe9a06\tbootable/recovery\x00'
'120000 blob acc2cbdf438f9d2141f0ae424cec1d8fc4b5d97f\tbootstrap.bash\x00' '120000 blob acc2cbdf438f9d2141f0ae424cec1d8fc4b5d97f\tbootstrap.bash\x00'
'160000 commit ade9b7a0d874e25fff4bf2552488825c6f111928\tbuild/bazel\x00') '160000 commit ade9b7a0d874e25fff4bf2552488825c6f111928\tbuild/bazel\x00')
with mock.patch.object(self._superproject, '_Clone', return_value=True): with mock.patch.object(self._superproject, '_Init', return_value=True):
with mock.patch.object(self._superproject, '_Fetch', return_value=True):
with mock.patch.object(self._superproject, '_LsTree', return_value=data): with mock.patch.object(self._superproject, '_LsTree', return_value=data):
commit_ids = self._superproject._GetAllProjectsCommitIds() commit_ids = self._superproject._GetAllProjectsCommitIds()
self.assertEqual(commit_ids, { self.assertEqual(commit_ids, {
@ -151,7 +152,7 @@ class SuperprojectTestCase(unittest.TestCase):
projects = self._superproject._manifest.projects projects = self._superproject._manifest.projects
data = ('160000 commit 2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea\tart\x00' data = ('160000 commit 2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea\tart\x00'
'160000 commit e9d25da64d8d365dbba7c8ee00fe8c4473fe9a06\tbootable/recovery\x00') '160000 commit e9d25da64d8d365dbba7c8ee00fe8c4473fe9a06\tbootable/recovery\x00')
with mock.patch.object(self._superproject, '_Clone', return_value=True): with mock.patch.object(self._superproject, '_Init', return_value=True):
with mock.patch.object(self._superproject, '_Fetch', return_value=True): with mock.patch.object(self._superproject, '_Fetch', return_value=True):
with mock.patch.object(self._superproject, with mock.patch.object(self._superproject,
'_LsTree', '_LsTree',