From 21dce3d8b351538d0fe8c05e6106c8b281580dda Mon Sep 17 00:00:00 2001 From: Raman Tenneti Date: Tue, 9 Feb 2021 00:26:31 -0800 Subject: [PATCH] init: added --use-superproject option to clone superproject. Added --no-use-superproject to repo and init.py to disable use of manifest superprojects. Replaced the term "sha" with "commit id". Added _GetBranch method to Superproject object. Moved shared code between init and sync into SyncSuperproject function. This function either does git clone or git fetch. If git fetch fails it does git clone. Changed Superproject constructor to accept manifest, repodir and branch to avoid passing them to multiple functions as argument. Changed functions that were raising exceptions to return either True or False. Saved the --use-superproject option in config as repo.superproject. Updated internal-fs-layout.md document. Updated the tests to work with the new API changes in Superproject. Performance for the first time sync has improved from 20 minutes to around 15 minutes. Tested the code with the following commands. $ ./run_tests -v Tested the sync code by using repo_dev alias and pointing to this CL. $ repo init took around 20 seconds longer because of cloning of superproject. $ time repo_dev init -u sso://android.git.corp.google.com/platform/manifest -b master --partial-clone --clone-filter=blob:limit=10M --repo-rev=main --use-superproject ... real 0m35.919s user 0m21.947s sys 0m8.977s First run $ time repo sync --use-superproject ... real 16m41.982s user 100m6.916s sys 19m18.753s No difference in repo sync time after the first run. Bug: [google internal] b/179090734 Bug: https://crbug.com/gerrit/13709 Bug: https://crbug.com/gerrit/13707 Change-Id: I12df92112f46e001dfbc6f12cd633c3a15cf924b Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/296382 Reviewed-by: Mike Frysinger Tested-by: Raman Tenneti --- docs/internal-fs-layout.md | 2 + git_superproject.py | 154 +++++++++++++++++++-------------- repo | 8 +- subcmds/init.py | 20 +++++ subcmds/sync.py | 22 +---- tests/test_git_superproject.py | 111 +++++++++++++----------- 6 files changed, 183 insertions(+), 134 deletions(-) diff --git a/docs/internal-fs-layout.md b/docs/internal-fs-layout.md index 4d18bb31..53c42638 100644 --- a/docs/internal-fs-layout.md +++ b/docs/internal-fs-layout.md @@ -142,11 +142,13 @@ User controlled settings are initialized when running `repo init`. | repo.partialclone | `--partial-clone` | Create [partial git clones] | | repo.reference | `--reference` | Reference repo client checkout | | repo.submodules | `--submodules` | Sync git submodules | +| repo.superproject | `--use-superproject` | Sync [superproject] | | repo.worktree | `--worktree` | Use `git worktree` for checkouts | | user.email | `--config-name` | User's e-mail address; Copied into `.git/config` when checking out a new project | | user.name | `--config-name` | User's name; Copied into `.git/config` when checking out a new project | [partial git clones]: https://git-scm.com/docs/gitrepository-layout#_code_partialclone_code +[superproject]: https://en.wikibooks.org/wiki/Git/Submodules_and_Superprojects ### Repo hooks settings diff --git a/git_superproject.py b/git_superproject.py index 378ede25..471dadc4 100644 --- a/git_superproject.py +++ b/git_superproject.py @@ -12,21 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Provide functionality to get all projects and their SHAs from Superproject. +"""Provide functionality to get all projects and their commit ids from Superproject. For more information on superproject, check out: https://en.wikibooks.org/wiki/Git/Submodules_and_Superprojects Examples: superproject = Superproject() - project_shas = superproject.GetAllProjectsSHAs() + project_commit_ids = superproject.UpdateProjectsRevisionId(projects) """ import os import sys -from error import BUG_REPORT_URL, GitError +from error import BUG_REPORT_URL from git_command import GitCommand +from git_refs import R_HEADS import platform_utils _SUPERPROJECT_GIT_NAME = 'superproject.git' @@ -34,19 +35,24 @@ _SUPERPROJECT_MANIFEST_NAME = 'superproject_override.xml' class Superproject(object): - """Get SHAs from superproject. + """Get commit ids from superproject. - It does a 'git clone' of superproject and 'git ls-tree' to get list of SHAs for all projects. - It contains project_shas which is a dictionary with project/sha entries. + It does a 'git clone' of superproject and 'git ls-tree' to get list of commit ids + for all projects. It contains project_commit_ids which is a dictionary with + project/commit id entries. """ - def __init__(self, repodir, superproject_dir='exp-superproject'): + def __init__(self, manifest, repodir, superproject_dir='exp-superproject'): """Initializes superproject. Args: + manifest: A Manifest object that is to be written to a file. repodir: Path to the .repo/ dir for holding all internal checkout state. + It must be in the top directory of the repo client checkout. superproject_dir: Relative path under |repodir| to checkout superproject. """ - self._project_shas = None + self._project_commit_ids = None + self._manifest = manifest + self._branch = self._GetBranch() self._repodir = os.path.abspath(repodir) self._superproject_dir = superproject_dir self._superproject_path = os.path.join(self._repodir, superproject_dir) @@ -56,25 +62,35 @@ class Superproject(object): _SUPERPROJECT_GIT_NAME) @property - def project_shas(self): - """Returns a dictionary of projects and their SHAs.""" - return self._project_shas + def project_commit_ids(self): + """Returns a dictionary of projects and their commit ids.""" + return self._project_commit_ids - def _Clone(self, url, branch=None): - """Do a 'git clone' for the given url and branch. + def _GetBranch(self): + """Returns the branch name for getting the approved manifest.""" + p = self._manifest.manifestProject + b = p.GetBranch(p.CurrentBranch) + if not b: + return None + branch = b.merge + if branch and branch.startswith(R_HEADS): + branch = branch[len(R_HEADS):] + return branch + + def _Clone(self, url): + """Do a 'git clone' for the given url. Args: url: superproject's url to be passed to git clone. - branch: The branchname to be passed as argument to git clone. Returns: - True if 'git clone ' is successful, or False. + True if git clone is successful, or False. """ if not os.path.exists(self._superproject_path): os.mkdir(self._superproject_path) cmd = ['clone', url, '--filter', 'blob:none', '--bare'] - if branch: - cmd += ['--branch', branch] + if self._branch: + cmd += ['--branch', self._branch] p = GitCommand(None, cmd, cwd=self._superproject_path, @@ -112,22 +128,20 @@ class Superproject(object): return False return True - def _LsTree(self, branch='HEAD'): - """Returns the data from 'git ls-tree -r '. + def _LsTree(self): + """Returns the data from 'git ls-tree ...'. Works only in git repositories. - Args: - branch: The branchname to be passed as argument to git ls-tree. - Returns: - data: data returned from 'git ls-tree -r HEAD' instead of None. + data: data returned from 'git ls-tree ...' instead of None. """ if not os.path.exists(self._work_git): print('git ls-tree missing drectory: %s' % self._work_git, file=sys.stderr) return None data = None + branch = 'HEAD' if not self._branch else self._branch cmd = ['ls-tree', '-z', '-r', branch] p = GitCommand(None, @@ -145,18 +159,25 @@ class Superproject(object): retval, p.stderr), file=sys.stderr) return data - def _GetAllProjectsSHAs(self, url, branch=None): - """Get SHAs for all projects from superproject and save them in _project_shas. - - Args: - url: superproject's url to be passed to git clone or fetch. - branch: The branchname to be passed as argument to git clone or fetch. + def Sync(self): + """Sync superproject either by git clone/fetch. Returns: - A dictionary with the projects/SHAs instead of None. + True if sync of superproject is successful, or False. """ + print('WARNING: --use-superproject is experimental and not ' + 'for general use', file=sys.stderr) + + if not self._manifest.superproject: + print('error: superproject tag is not defined in manifest', + file=sys.stderr) + return False + + url = self._manifest.superproject['remote'].url if not url: - raise ValueError('url argument is not supplied.') + print('error: superproject URL is not defined in manifest', + file=sys.stderr) + return False do_clone = True if os.path.exists(self._superproject_path): @@ -166,35 +187,44 @@ class Superproject(object): else: do_clone = False if do_clone: - if not self._Clone(url, branch): - raise GitError('git clone failed for url: %s' % url) + if not self._Clone(url): + print('error: git clone failed for url: %s' % url, file=sys.stderr) + return False + return True - data = self._LsTree(branch) + def _GetAllProjectsCommitIds(self): + """Get commit ids for all projects from superproject and save them in _project_commit_ids. + + Returns: + A dictionary with the projects/commit ids on success, otherwise None. + """ + if not self.Sync(): + return None + + data = self._LsTree() if not data: - raise GitError('git ls-tree failed for url: %s' % url) + print('error: git ls-tree failed for superproject', file=sys.stderr) + return None # Parse lines like the following to select lines starting with '160000' and - # build a dictionary with project path (last element) and its SHA (3rd element). + # build a dictionary with project path (last element) and its commit id (3rd element). # # 160000 commit 2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea\tart\x00 # 120000 blob acc2cbdf438f9d2141f0ae424cec1d8fc4b5d97f\tbootstrap.bash\x00 - shas = {} + commit_ids = {} for line in data.split('\x00'): ls_data = line.split(None, 3) if not ls_data: break if ls_data[0] == '160000': - shas[ls_data[3]] = ls_data[2] + commit_ids[ls_data[3]] = ls_data[2] - self._project_shas = shas - return shas + self._project_commit_ids = commit_ids + return commit_ids - def _WriteManfiestFile(self, manifest): + def _WriteManfiestFile(self): """Writes manifest to a file. - Args: - manifest: A Manifest object that is to be written to a file. - Returns: manifest_path: Path name of the file into which manifest is written instead of None. """ @@ -203,7 +233,7 @@ class Superproject(object): self._superproject_path, file=sys.stderr) return None - manifest_str = manifest.ToXml().toxml() + manifest_str = self._manifest.ToXml().toxml() manifest_path = self._manifest_path try: with open(manifest_path, 'w', encoding='utf-8') as fp: @@ -215,40 +245,34 @@ class Superproject(object): return None return manifest_path - def UpdateProjectsRevisionId(self, manifest, projects, url, branch=None): - """Update revisionId of every project in projects with the SHA. + def UpdateProjectsRevisionId(self, projects): + """Update revisionId of every project in projects with the commit id. Args: - manifest: A Manifest object that is to be written to a file. projects: List of projects whose revisionId needs to be updated. - url: superproject's url to be passed to git clone or fetch. - branch: The branchname to be passed as argument to git clone or fetch. Returns: manifest_path: Path name of the overriding manfiest file instead of None. """ - try: - shas = self._GetAllProjectsSHAs(url=url, branch=branch) - except Exception as e: - print('error: Cannot get project SHAs for %s: %s: %s' % - (url, type(e).__name__, str(e)), - file=sys.stderr) + commit_ids = self._GetAllProjectsCommitIds() + if not commit_ids: + print('error: Cannot get project commit ids from manifest', file=sys.stderr) return None - projects_missing_shas = [] + projects_missing_commit_ids = [] for project in projects: path = project.relpath if not path: continue - sha = shas.get(path) - if sha: - project.SetRevisionId(sha) + commit_id = commit_ids.get(path) + if commit_id: + project.SetRevisionId(commit_id) else: - projects_missing_shas.append(path) - if projects_missing_shas: - print('error: please file a bug using %s to report missing shas for: %s' % - (BUG_REPORT_URL, projects_missing_shas), file=sys.stderr) + projects_missing_commit_ids.append(path) + if projects_missing_commit_ids: + print('error: please file a bug using %s to report missing commit_ids for: %s' % + (BUG_REPORT_URL, projects_missing_commit_ids), file=sys.stderr) return None - manifest_path = self._WriteManfiestFile(manifest) + manifest_path = self._WriteManfiestFile() return manifest_path diff --git a/repo b/repo index 83933d7c..63cee031 100755 --- a/repo +++ b/repo @@ -324,6 +324,11 @@ def GetParser(gitc_init=False): 'each project. See git archive.') group.add_option('--submodules', action='store_true', help='sync any submodules associated with the manifest repo') + group.add_option('--use-superproject', action='store_true', default=None, + help='use the manifest superproject to sync projects') + group.add_option('--no-use-superproject', action='store_false', + dest='use_superproject', + help='disable use of manifest superprojects') group.add_option('-g', '--groups', default='default', help='restrict manifest projects to ones with specified ' 'group(s) [default|all|G1,G2,G3|G4,-G5,-G6]', @@ -333,7 +338,8 @@ def GetParser(gitc_init=False): 'platform group [auto|all|none|linux|darwin|...]', metavar='PLATFORM') group.add_option('--clone-bundle', action='store_true', - help='enable use of /clone.bundle on HTTP/HTTPS (default if not --partial-clone)') + help='enable use of /clone.bundle on HTTP/HTTPS ' + '(default if not --partial-clone)') group.add_option('--no-clone-bundle', dest='clone_bundle', action='store_false', help='disable use of /clone.bundle on HTTP/HTTPS (default if --partial-clone)') diff --git a/subcmds/init.py b/subcmds/init.py index fe3ebd2c..1d16c856 100644 --- a/subcmds/init.py +++ b/subcmds/init.py @@ -25,6 +25,7 @@ from error import ManifestParseError from project import SyncBuffer from git_config import GitConfig from git_command import git_require, MIN_GIT_VERSION_SOFT, MIN_GIT_VERSION_HARD +import git_superproject import platform_utils from wrapper import Wrapper @@ -134,6 +135,11 @@ to update the working directory files. g.add_option('--submodules', dest='submodules', action='store_true', help='sync any submodules associated with the manifest repo') + g.add_option('--use-superproject', action='store_true', + help='use the manifest superproject to sync projects') + g.add_option('--no-use-superproject', action='store_false', + dest='use_superproject', + help='disable use of manifest superprojects') g.add_option('-g', '--groups', dest='groups', default='default', help='restrict manifest projects to ones with specified ' @@ -176,6 +182,14 @@ to update the working directory files. return {'REPO_MANIFEST_URL': 'manifest_url', 'REPO_MIRROR_LOCATION': 'reference'} + def _CloneSuperproject(self): + """Clone the superproject based on the superproject's url and branch.""" + superproject = git_superproject.Superproject(self.manifest, + self.repodir) + if not superproject.Sync(): + print('error: git update of superproject failed', file=sys.stderr) + sys.exit(1) + def _SyncManifest(self, opt): m = self.manifest.manifestProject is_new = not m.Exists @@ -305,6 +319,9 @@ to update the working directory files. if opt.submodules: m.config.SetBoolean('repo.submodules', opt.submodules) + if opt.use_superproject is not None: + m.config.SetBoolean('repo.superproject', opt.use_superproject) + if not m.Sync_NetworkHalf(is_new=is_new, quiet=opt.quiet, verbose=opt.verbose, clone_bundle=opt.clone_bundle, current_branch_only=opt.current_branch_only, @@ -519,6 +536,9 @@ to update the working directory files. self._SyncManifest(opt) self._LinkManifest(opt.manifest_name) + if self.manifest.manifestProject.config.GetBoolean('repo.superproject'): + self._CloneSuperproject() + if os.isatty(0) and os.isatty(1) and not self.manifest.IsMirror: if opt.config_name or self._ShouldConfigureUser(opt): self._ConfigureUser(opt) diff --git a/subcmds/sync.py b/subcmds/sync.py index 5020ea7a..eda95f96 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -294,28 +294,12 @@ later is required to fix a server side protocol bug. Returns: Returns path to the overriding manifest file. """ - if not self.manifest.superproject: - print('error: superproject tag is not defined in manifest.xml', - file=sys.stderr) - sys.exit(1) - print('WARNING: --use-superproject is experimental and not ' - 'for general use', file=sys.stderr) - - superproject_url = self.manifest.superproject['remote'].url - if not superproject_url: - print('error: superproject URL is not defined in manifest.xml', - file=sys.stderr) - sys.exit(1) - - superproject = git_superproject.Superproject(self.manifest.repodir) + superproject = git_superproject.Superproject(self.manifest, + self.repodir) all_projects = self.GetProjects(args, missing_ok=True, submodules_ok=opt.fetch_submodules) - branch = self._GetBranch() - manifest_path = superproject.UpdateProjectsRevisionId(self.manifest, - all_projects, - url=superproject_url, - branch=branch) + manifest_path = superproject.UpdateProjectsRevisionId(all_projects) if not manifest_path: print('error: Update of revsionId from superproject has failed', file=sys.stderr) diff --git a/tests/test_git_superproject.py b/tests/test_git_superproject.py index fc9101dd..d2ee9f4f 100644 --- a/tests/test_git_superproject.py +++ b/tests/test_git_superproject.py @@ -19,7 +19,6 @@ import tempfile import unittest from unittest import mock -from error import GitError import git_superproject import manifest_xml import platform_utils @@ -32,7 +31,6 @@ class SuperprojectTestCase(unittest.TestCase): """Set up superproject every time.""" self.tempdir = tempfile.mkdtemp(prefix='repo_tests') self.repodir = os.path.join(self.tempdir, '.repo') - self._superproject = git_superproject.Superproject(self.repodir) self.manifest_file = os.path.join( self.repodir, manifest_xml.MANIFEST_FILE_NAME) os.mkdir(self.repodir) @@ -45,6 +43,16 @@ class SuperprojectTestCase(unittest.TestCase): url = https://localhost:0/manifest """) + manifest = self.getXmlManifest(""" + + + + + + +""") + self._superproject = git_superproject.Superproject(manifest, self.repodir) + def tearDown(self): """Tear down superproject every time.""" platform_utils.rmtree(self.tempdir) @@ -55,37 +63,53 @@ class SuperprojectTestCase(unittest.TestCase): fp.write(data) return manifest_xml.XmlManifest(self.repodir, self.manifest_file) - def test_superproject_get_project_shas_no_url(self): + def test_superproject_get_superproject_no_superproject(self): """Test with no url.""" - with self.assertRaises(ValueError): - self._superproject._GetAllProjectsSHAs(url=None) + manifest = self.getXmlManifest(""" + + +""") + superproject = git_superproject.Superproject(manifest, self.repodir) + self.assertFalse(superproject.Sync()) - def test_superproject_get_project_shas_invalid_url(self): + def test_superproject_get_superproject_invalid_url(self): """Test with an invalid url.""" - with self.assertRaises(GitError): - self._superproject._GetAllProjectsSHAs(url='localhost') + manifest = self.getXmlManifest(""" + + + + + +""") + superproject = git_superproject.Superproject(manifest, self.repodir) + self.assertFalse(superproject.Sync()) - def test_superproject_get_project_shas_invalid_branch(self): + def test_superproject_get_superproject_invalid_branch(self): """Test with an invalid branch.""" - with self.assertRaises(GitError): - self._superproject._GetAllProjectsSHAs( - url='sso://android/platform/superproject', - branch='junk') + manifest = self.getXmlManifest(""" + + + + + +""") + superproject = git_superproject.Superproject(manifest, self.repodir) + with mock.patch.object(self._superproject, '_GetBranch', return_value='junk'): + self.assertFalse(superproject.Sync()) - def test_superproject_get_project_shas_mock_clone(self): + def test_superproject_get_superproject_mock_clone(self): """Test with _Clone failing.""" - with self.assertRaises(GitError): - with mock.patch.object(self._superproject, '_Clone', return_value=False): - self._superproject._GetAllProjectsSHAs(url='localhost') + with mock.patch.object(self._superproject, '_Clone', return_value=False): + self.assertFalse(self._superproject.Sync()) - def test_superproject_get_project_shas_mock_fetch(self): - """Test with _Fetch failing.""" - with self.assertRaises(GitError): - with mock.patch.object(self._superproject, '_Clone', return_value=True): - with mock.patch.object(self._superproject, '_Fetch', return_value=False): - self._superproject._GetAllProjectsSHAs(url='localhost') + def test_superproject_get_superproject_mock_fetch(self): + """Test with _Fetch failing and _clone being called.""" + with mock.patch.object(self._superproject, '_Clone', return_value=True): + os.mkdir(self._superproject._superproject_path) + with mock.patch.object(self._superproject, '_Fetch', return_value=False): + self.assertTrue(self._superproject.Sync()) - def test_superproject_get_project_shas_mock_ls_tree(self): + def test_superproject_get_all_project_commit_ids_mock_ls_tree(self): """Test with LsTree being a mock.""" data = ('120000 blob 158258bdf146f159218e2b90f8b699c4d85b5804\tAndroid.bp\x00' '160000 commit 2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea\tart\x00' @@ -94,8 +118,8 @@ class SuperprojectTestCase(unittest.TestCase): '160000 commit ade9b7a0d874e25fff4bf2552488825c6f111928\tbuild/bazel\x00') with mock.patch.object(self._superproject, '_Clone', return_value=True): with mock.patch.object(self._superproject, '_LsTree', return_value=data): - shas = self._superproject._GetAllProjectsSHAs(url='localhost', branch='junk') - self.assertEqual(shas, { + commit_ids = self._superproject._GetAllProjectsCommitIds() + self.assertEqual(commit_ids, { 'art': '2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea', 'bootable/recovery': 'e9d25da64d8d365dbba7c8ee00fe8c4473fe9a06', 'build/bazel': 'ade9b7a0d874e25fff4bf2552488825c6f111928' @@ -103,19 +127,12 @@ class SuperprojectTestCase(unittest.TestCase): def test_superproject_write_manifest_file(self): """Test with writing manifest to a file after setting revisionId.""" - manifest = self.getXmlManifest(""" - - - - - -""") - self.assertEqual(len(manifest.projects), 1) - project = manifest.projects[0] + self.assertEqual(len(self._superproject._manifest.projects), 1) + project = self._superproject._manifest.projects[0] project.SetRevisionId('ABCDEF') # Create temporary directory so that it can write the file. os.mkdir(self._superproject._superproject_path) - manifest_path = self._superproject._WriteManfiestFile(manifest) + manifest_path = self._superproject._WriteManfiestFile() self.assertIsNotNone(manifest_path) with open(manifest_path, 'r') as fp: manifest_xml = fp.read() @@ -124,29 +141,24 @@ class SuperprojectTestCase(unittest.TestCase): '' + '' + '' + - '' + + '' + + '' + '') def test_superproject_update_project_revision_id(self): """Test with LsTree being a mock.""" - manifest = self.getXmlManifest(""" - - - - - -""") - self.assertEqual(len(manifest.projects), 1) - projects = manifest.projects + self.assertEqual(len(self._superproject._manifest.projects), 1) + projects = self._superproject._manifest.projects data = ('160000 commit 2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea\tart\x00' '160000 commit e9d25da64d8d365dbba7c8ee00fe8c4473fe9a06\tbootable/recovery\x00') with mock.patch.object(self._superproject, '_Clone', return_value=True): with mock.patch.object(self._superproject, '_Fetch', return_value=True): - with mock.patch.object(self._superproject, '_LsTree', return_value=data): + with mock.patch.object(self._superproject, + '_LsTree', + return_value=data): # Create temporary directory so that it can write the file. os.mkdir(self._superproject._superproject_path) - manifest_path = self._superproject.UpdateProjectsRevisionId( - manifest, projects, url='localhost') + manifest_path = self._superproject.UpdateProjectsRevisionId(projects) self.assertIsNotNone(manifest_path) with open(manifest_path, 'r') as fp: manifest_xml = fp.read() @@ -157,6 +169,7 @@ class SuperprojectTestCase(unittest.TestCase): '' + '' + + '' + '')