diff --git a/command.py b/command.py index dc6052a7..96d7848f 100644 --- a/command.py +++ b/command.py @@ -100,7 +100,33 @@ class Command(object): """ raise NotImplementedError - def GetProjects(self, args, missing_ok=False): + def _ResetPathToProjectMap(self, projects): + self._by_path = dict((p.worktree, p) for p in projects) + + def _UpdatePathToProjectMap(self, project): + self._by_path[project.worktree] = project + + def _GetProjectByPath(self, path): + project = None + if os.path.exists(path): + oldpath = None + while path \ + and path != oldpath \ + and path != self.manifest.topdir: + try: + project = self._by_path[path] + break + except KeyError: + oldpath = path + path = os.path.dirname(path) + else: + try: + project = self._by_path[path] + except KeyError: + pass + return project + + def GetProjects(self, args, missing_ok=False, submodules_ok=False): """A list of projects that match the arguments. """ all_projects = self.manifest.projects @@ -114,40 +140,37 @@ class Command(object): groups = [x for x in re.split(r'[,\s]+', groups) if x] if not args: - for project in all_projects.values(): + all_projects_list = all_projects.values() + derived_projects = {} + for project in all_projects_list: + if submodules_ok or project.sync_s: + derived_projects.update((p.name, p) + for p in project.GetDerivedSubprojects()) + all_projects_list.extend(derived_projects.values()) + for project in all_projects_list: if ((missing_ok or project.Exists) and project.MatchesGroups(groups)): result.append(project) else: - by_path = None + self._ResetPathToProjectMap(all_projects.values()) for arg in args: project = all_projects.get(arg) if not project: path = os.path.abspath(arg).replace('\\', '/') + project = self._GetProjectByPath(path) - if not by_path: - by_path = dict() - for p in all_projects.values(): - by_path[p.worktree] = p - - if os.path.exists(path): - oldpath = None - while path \ - and path != oldpath \ - and path != self.manifest.topdir: - try: - project = by_path[path] - break - except KeyError: - oldpath = path - path = os.path.dirname(path) - else: - try: - project = by_path[path] - except KeyError: - pass + # If it's not a derived project, update path->project mapping and + # search again, as arg might actually point to a derived subproject. + if (project and not project.Derived and + (submodules_ok or project.sync_s)): + search_again = False + for subproject in project.GetDerivedSubprojects(): + self._UpdatePathToProjectMap(subproject) + search_again = True + if search_again: + project = self._GetProjectByPath(path) or project if not project: raise NoSuchProjectError(arg) diff --git a/docs/manifest-format.txt b/docs/manifest-format.txt index a54282c8..f6dba640 100644 --- a/docs/manifest-format.txt +++ b/docs/manifest-format.txt @@ -41,17 +41,20 @@ following DTD: + - + + @@ -152,7 +155,10 @@ Element project One or more project elements may be specified. Each element describes a single Git repository to be cloned into the repo -client workspace. +client workspace. You may specify Git-submodules by creating a +nested project. Git-submodules will be automatically +recognized and inherit their parent's attributes, but those +may be overridden by an explicitly specified project element. Attribute `name`: A unique name for this project. The project's name is appended onto its remote's fetch URL to generate the actual @@ -163,7 +169,8 @@ URL to configure the Git remote with. The URL gets formed as: where ${remote_fetch} is the remote's fetch attribute and ${project_name} is the project's name attribute. The suffix ".git" is always appended as repo assumes the upstream is a forest of -bare Git repositories. +bare Git repositories. If the project has a parent element, its +name will be prefixed by the parent's. The project name must match the name Gerrit knows, if Gerrit is being used for code reviews. @@ -171,6 +178,8 @@ being used for code reviews. Attribute `path`: An optional path relative to the top directory of the repo client where the Git working directory for this project should be placed. If not supplied the project name is used. +If the project has a parent element, its path will be prefixed +by the parent's. Attribute `remote`: Name of a previously defined remote element. If not supplied the remote given by the default element is used. @@ -190,6 +199,8 @@ its name:`name` and path:`path`. E.g. for definition is implicitly in the following manifest groups: default, name:monkeys, and path:barrel-of. If you place a project in the group "notdefault", it will not be automatically downloaded by repo. +If the project has a parent element, the `name` and `path` here +are the prefixed ones. Element annotation ------------------ diff --git a/manifest_xml.py b/manifest_xml.py index 122393cf..36f8ef87 100644 --- a/manifest_xml.py +++ b/manifest_xml.py @@ -40,6 +40,7 @@ class _Default(object): remote = None sync_j = 1 sync_c = False + sync_s = False class _XmlRemote(object): def __init__(self, @@ -178,6 +179,9 @@ class XmlManifest(object): if d.sync_c: have_default = True e.setAttribute('sync-c', 'true') + if d.sync_s: + have_default = True + e.setAttribute('sync-s', 'true') if have_default: root.appendChild(e) root.appendChild(doc.createTextNode('')) @@ -188,20 +192,25 @@ class XmlManifest(object): root.appendChild(e) root.appendChild(doc.createTextNode('')) - sort_projects = list(self.projects.keys()) - sort_projects.sort() - - for p in sort_projects: - p = self.projects[p] + def output_projects(parent, parent_node, projects): + for p in projects: + output_project(parent, parent_node, self.projects[p]) + def output_project(parent, parent_node, p): if not p.MatchesGroups(groups): - continue + return + + name = p.name + relpath = p.relpath + if parent: + name = self._UnjoinName(parent.name, name) + relpath = self._UnjoinRelpath(parent.relpath, relpath) e = doc.createElement('project') - root.appendChild(e) - e.setAttribute('name', p.name) - if p.relpath != p.name: - e.setAttribute('path', p.relpath) + parent_node.appendChild(e) + e.setAttribute('name', name) + if relpath != name: + e.setAttribute('path', relpath) if not d.remote or p.remote.name != d.remote.name: e.setAttribute('remote', p.remote.name) if peg_rev: @@ -239,6 +248,19 @@ class XmlManifest(object): if p.sync_c: e.setAttribute('sync-c', 'true') + if p.sync_s: + e.setAttribute('sync-s', 'true') + + if p.subprojects: + sort_projects = [subp.name for subp in p.subprojects] + sort_projects.sort() + output_projects(p, e, sort_projects) + + sort_projects = [key for key in self.projects.keys() + if not self.projects[key].parent] + sort_projects.sort() + output_projects(None, root, sort_projects) + if self._repo_hooks_project: root.appendChild(doc.createTextNode('')) e = doc.createElement('repo-hooks') @@ -409,14 +431,19 @@ class XmlManifest(object): (self.manifestFile)) self._manifest_server = url + def recursively_add_projects(project): + if self._projects.get(project.name): + raise ManifestParseError( + 'duplicate project %s in %s' % + (project.name, self.manifestFile)) + self._projects[project.name] = project + for subproject in project.subprojects: + recursively_add_projects(subproject) + for node in itertools.chain(*node_list): if node.nodeName == 'project': project = self._ParseProject(node) - if self._projects.get(project.name): - raise ManifestParseError( - 'duplicate project %s in %s' % - (project.name, self.manifestFile)) - self._projects[project.name] = project + recursively_add_projects(project) if node.nodeName == 'repo-hooks': # Get the name of the project and the (space-separated) list of enabled. repo_hooks_project = self._reqatt(node, 'in-project') @@ -524,6 +551,12 @@ class XmlManifest(object): d.sync_c = False else: d.sync_c = sync_c.lower() in ("yes", "true", "1") + + sync_s = node.getAttribute('sync-s') + if not sync_s: + d.sync_s = False + else: + d.sync_s = sync_s.lower() in ("yes", "true", "1") return d def _ParseNotice(self, node): @@ -565,11 +598,19 @@ class XmlManifest(object): return '\n'.join(cleanLines) - def _ParseProject(self, node): + def _JoinName(self, parent_name, name): + return os.path.join(parent_name, name) + + def _UnjoinName(self, parent_name, name): + return os.path.relpath(name, parent_name) + + def _ParseProject(self, node, parent = None): """ reads a element from the manifest file """ name = self._reqatt(node, 'name') + if parent: + name = self._JoinName(parent.name, name) remote = self._get_remote(node) if remote is None: @@ -607,6 +648,12 @@ class XmlManifest(object): else: sync_c = sync_c.lower() in ("yes", "true", "1") + sync_s = node.getAttribute('sync-s') + if not sync_s: + sync_s = self._default.sync_s + else: + sync_s = sync_s.lower() in ("yes", "true", "1") + upstream = node.getAttribute('upstream') groups = '' @@ -614,37 +661,67 @@ class XmlManifest(object): groups = node.getAttribute('groups') groups = [x for x in re.split(r'[,\s]+', groups) if x] - default_groups = ['all', 'name:%s' % name, 'path:%s' % path] - groups.extend(set(default_groups).difference(groups)) - - if self.IsMirror: - worktree = None - gitdir = os.path.join(self.topdir, '%s.git' % name) + if parent is None: + relpath, worktree, gitdir = self.GetProjectPaths(name, path) else: - worktree = os.path.join(self.topdir, path).replace('\\', '/') - gitdir = os.path.join(self.repodir, 'projects/%s.git' % path) + relpath, worktree, gitdir = self.GetSubprojectPaths(parent, path) + + default_groups = ['all', 'name:%s' % name, 'path:%s' % relpath] + groups.extend(set(default_groups).difference(groups)) project = Project(manifest = self, name = name, remote = remote.ToRemoteSpec(name), gitdir = gitdir, worktree = worktree, - relpath = path, + relpath = relpath, revisionExpr = revisionExpr, revisionId = None, rebase = rebase, groups = groups, sync_c = sync_c, - upstream = upstream) + sync_s = sync_s, + upstream = upstream, + parent = parent) for n in node.childNodes: if n.nodeName == 'copyfile': self._ParseCopyFile(project, n) if n.nodeName == 'annotation': self._ParseAnnotation(project, n) + if n.nodeName == 'project': + project.subprojects.append(self._ParseProject(n, parent = project)) return project + def GetProjectPaths(self, name, path): + relpath = path + if self.IsMirror: + worktree = None + gitdir = os.path.join(self.topdir, '%s.git' % name) + else: + worktree = os.path.join(self.topdir, path).replace('\\', '/') + gitdir = os.path.join(self.repodir, 'projects', '%s.git' % path) + return relpath, worktree, gitdir + + def GetSubprojectName(self, parent, submodule_path): + return os.path.join(parent.name, submodule_path) + + def _JoinRelpath(self, parent_relpath, relpath): + return os.path.join(parent_relpath, relpath) + + def _UnjoinRelpath(self, parent_relpath, relpath): + return os.path.relpath(relpath, parent_relpath) + + def GetSubprojectPaths(self, parent, path): + relpath = self._JoinRelpath(parent.relpath, path) + gitdir = os.path.join(parent.gitdir, 'subprojects', '%s.git' % path) + if self.IsMirror: + worktree = None + else: + worktree = os.path.join(parent.worktree, path).replace('\\', '/') + return relpath, worktree, gitdir + def _ParseCopyFile(self, project, node): src = self._reqatt(node, 'src') dest = self._reqatt(node, 'dest') diff --git a/project.py b/project.py index 08b27710..46b76118 100644 --- a/project.py +++ b/project.py @@ -23,6 +23,7 @@ import shutil import stat import subprocess import sys +import tempfile import time from color import Coloring @@ -486,7 +487,30 @@ class Project(object): rebase = True, groups = None, sync_c = False, - upstream = None): + sync_s = False, + upstream = None, + parent = None, + is_derived = False): + """Init a Project object. + + Args: + manifest: The XmlManifest object. + name: The `name` attribute of manifest.xml's project element. + remote: RemoteSpec object specifying its remote's properties. + gitdir: Absolute path of git directory. + worktree: Absolute path of git working tree. + relpath: Relative path of git working tree to repo's top directory. + revisionExpr: The `revision` attribute of manifest.xml's project element. + revisionId: git commit id for checking out. + rebase: The `rebase` attribute of manifest.xml's project element. + groups: The `groups` attribute of manifest.xml's project element. + sync_c: The `sync-c` attribute of manifest.xml's project element. + sync_s: The `sync-s` attribute of manifest.xml's project element. + upstream: The `upstream` attribute of manifest.xml's project element. + parent: The parent Project object. + is_derived: False if the project was explicitly defined in the manifest; + True if the project is a discovered submodule. + """ self.manifest = manifest self.name = name self.remote = remote @@ -508,7 +532,11 @@ class Project(object): self.rebase = rebase self.groups = groups self.sync_c = sync_c + self.sync_s = sync_s self.upstream = upstream + self.parent = parent + self.is_derived = is_derived + self.subprojects = [] self.snapshots = {} self.copyfiles = [] @@ -528,6 +556,10 @@ class Project(object): # project containing repo hooks. self.enabled_repo_hooks = [] + @property + def Derived(self): + return self.is_derived + @property def Exists(self): return os.path.isdir(self.gitdir) @@ -1370,6 +1402,149 @@ class Project(object): return kept +## Submodule Management ## + + def GetRegisteredSubprojects(self): + result = [] + def rec(subprojects): + if not subprojects: + return + result.extend(subprojects) + for p in subprojects: + rec(p.subprojects) + rec(self.subprojects) + return result + + def _GetSubmodules(self): + # Unfortunately we cannot call `git submodule status --recursive` here + # because the working tree might not exist yet, and it cannot be used + # without a working tree in its current implementation. + + def get_submodules(gitdir, rev): + # Parse .gitmodules for submodule sub_paths and sub_urls + sub_paths, sub_urls = parse_gitmodules(gitdir, rev) + if not sub_paths: + return [] + # Run `git ls-tree` to read SHAs of submodule object, which happen to be + # revision of submodule repository + sub_revs = git_ls_tree(gitdir, rev, sub_paths) + submodules = [] + for sub_path, sub_url in zip(sub_paths, sub_urls): + try: + sub_rev = sub_revs[sub_path] + except KeyError: + # Ignore non-exist submodules + continue + submodules.append((sub_rev, sub_path, sub_url)) + return submodules + + re_path = re.compile(r'^submodule\.([^.]+)\.path=(.*)$') + re_url = re.compile(r'^submodule\.([^.]+)\.url=(.*)$') + def parse_gitmodules(gitdir, rev): + cmd = ['cat-file', 'blob', '%s:.gitmodules' % rev] + try: + p = GitCommand(None, cmd, capture_stdout = True, capture_stderr = True, + bare = True, gitdir = gitdir) + except GitError: + return [], [] + if p.Wait() != 0: + return [], [] + + gitmodules_lines = [] + fd, temp_gitmodules_path = tempfile.mkstemp() + try: + os.write(fd, p.stdout) + os.close(fd) + cmd = ['config', '--file', temp_gitmodules_path, '--list'] + p = GitCommand(None, cmd, capture_stdout = True, capture_stderr = True, + bare = True, gitdir = gitdir) + if p.Wait() != 0: + return [], [] + gitmodules_lines = p.stdout.split('\n') + except GitError: + return [], [] + finally: + os.remove(temp_gitmodules_path) + + names = set() + paths = {} + urls = {} + for line in gitmodules_lines: + if not line: + continue + m = re_path.match(line) + if m: + names.add(m.group(1)) + paths[m.group(1)] = m.group(2) + continue + m = re_url.match(line) + if m: + names.add(m.group(1)) + urls[m.group(1)] = m.group(2) + continue + names = sorted(names) + return ([paths.get(name, '') for name in names], + [urls.get(name, '') for name in names]) + + def git_ls_tree(gitdir, rev, paths): + cmd = ['ls-tree', rev, '--'] + cmd.extend(paths) + try: + p = GitCommand(None, cmd, capture_stdout = True, capture_stderr = True, + bare = True, gitdir = gitdir) + except GitError: + return [] + if p.Wait() != 0: + return [] + objects = {} + for line in p.stdout.split('\n'): + if not line.strip(): + continue + object_rev, object_path = line.split()[2:4] + objects[object_path] = object_rev + return objects + + try: + rev = self.GetRevisionId() + except GitError: + return [] + return get_submodules(self.gitdir, rev) + + def GetDerivedSubprojects(self): + result = [] + if not self.Exists: + # If git repo does not exist yet, querying its submodules will + # mess up its states; so return here. + return result + for rev, path, url in self._GetSubmodules(): + name = self.manifest.GetSubprojectName(self, path) + project = self.manifest.projects.get(name) + if project: + result.extend(project.GetDerivedSubprojects()) + continue + relpath, worktree, gitdir = self.manifest.GetSubprojectPaths(self, path) + remote = RemoteSpec(self.remote.name, + url = url, + review = self.remote.review) + subproject = Project(manifest = self.manifest, + name = name, + remote = remote, + gitdir = gitdir, + worktree = worktree, + relpath = relpath, + revisionExpr = self.revisionExpr, + revisionId = rev, + rebase = self.rebase, + groups = self.groups, + sync_c = self.sync_c, + sync_s = self.sync_s, + parent = self, + is_derived = True) + result.append(subproject) + result.extend(subproject.GetDerivedSubprojects()) + return result + + ## Direct Git Commands ## def _RemoteFetch(self, name=None, diff --git a/subcmds/sync.py b/subcmds/sync.py index 5b3dca78..f8094738 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py @@ -114,6 +114,9 @@ resumeable bundle file on a content delivery network. This may be necessary if there are problems with the local Python HTTP client or proxy configuration, but the Git binary works. +The --fetch-submodules option enables fetching Git submodules +of a project from server. + SSH Connections --------------- @@ -180,6 +183,9 @@ later is required to fix a server side protocol bug. p.add_option('-p', '--manifest-server-password', action='store', dest='manifest_server_password', help='password to authenticate with the manifest server') + p.add_option('--fetch-submodules', + dest='fetch_submodules', action='store_true', + help='fetch submodules from server') if show_smart: p.add_option('-s', '--smart-sync', dest='smart_sync', action='store_true', @@ -559,7 +565,9 @@ later is required to fix a server side protocol bug. self.manifest._Unload() if opt.jobs is None: self.jobs = self.manifest.default.sync_j - all_projects = self.GetProjects(args, missing_ok=True) + all_projects = self.GetProjects(args, + missing_ok=True, + submodules_ok=opt.fetch_submodules) self._fetch_times = _FetchTimes(self.manifest) if not opt.local_only: @@ -570,12 +578,33 @@ later is required to fix a server side protocol bug. to_fetch.extend(all_projects) to_fetch.sort(key=self._fetch_times.Get, reverse=True) - self._Fetch(to_fetch, opt) + fetched = self._Fetch(to_fetch, opt) _PostRepoFetch(rp, opt.no_repo_verify) if opt.network_only: # bail out now; the rest touches the working tree return + # Iteratively fetch missing and/or nested unregistered submodules + previously_missing_set = set() + while True: + self.manifest._Unload() + all_projects = self.GetProjects(args, + missing_ok=True, + submodules_ok=opt.fetch_submodules) + missing = [] + for project in all_projects: + if project.gitdir not in fetched: + missing.append(project) + if not missing: + break + # Stop us from non-stopped fetching actually-missing repos: If set of + # missing repos has not been changed from last fetch, we break. + missing_set = set(p.name for p in missing) + if previously_missing_set == missing_set: + break + previously_missing_set = missing_set + fetched.update(self._Fetch(missing, opt)) + if self.manifest.IsMirror: # bail out now, we have no working tree return