# Copyright (C) 2008 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import collections import itertools import os import platform import re import sys import xml.dom.minidom import urllib.parse import gitc_utils from git_config import GitConfig, IsId from git_refs import R_HEADS, HEAD import platform_utils from project import RemoteSpec, Project, MetaProject from error import (ManifestParseError, ManifestInvalidPathError, ManifestInvalidRevisionError) from wrapper import Wrapper MANIFEST_FILE_NAME = 'manifest.xml' LOCAL_MANIFEST_NAME = 'local_manifest.xml' LOCAL_MANIFESTS_DIR_NAME = 'local_manifests' # Add all projects from local manifest into a group. LOCAL_MANIFEST_GROUP_PREFIX = 'local:' # ContactInfo has the self-registered bug url, supplied by the manifest authors. ContactInfo = collections.namedtuple('ContactInfo', 'bugurl') # urljoin gets confused if the scheme is not known. urllib.parse.uses_relative.extend([ 'ssh', 'git', 'persistent-https', 'sso', 'rpc']) urllib.parse.uses_netloc.extend([ 'ssh', 'git', 'persistent-https', 'sso', 'rpc']) def XmlBool(node, attr, default=None): """Determine boolean value of |node|'s |attr|. Invalid values will issue a non-fatal warning. Args: node: XML node whose attributes we access. attr: The attribute to access. default: If the attribute is not set (value is empty), then use this. Returns: True if the attribute is a valid string representing true. False if the attribute is a valid string representing false. |default| otherwise. """ value = node.getAttribute(attr) s = value.lower() if s == '': return default elif s in {'yes', 'true', '1'}: return True elif s in {'no', 'false', '0'}: return False else: print('warning: manifest: %s="%s": ignoring invalid XML boolean' % (attr, value), file=sys.stderr) return default def XmlInt(node, attr, default=None): """Determine integer value of |node|'s |attr|. Args: node: XML node whose attributes we access. attr: The attribute to access. default: If the attribute is not set (value is empty), then use this. Returns: The number if the attribute is a valid number. Raises: ManifestParseError: The number is invalid. """ value = node.getAttribute(attr) if not value: return default try: return int(value) except ValueError: raise ManifestParseError('manifest: invalid %s="%s" integer' % (attr, value)) class _Default(object): """Project defaults within the manifest.""" revisionExpr = None destBranchExpr = None upstreamExpr = None remote = None sync_j = 1 sync_c = False sync_s = False sync_tags = True def __eq__(self, other): if not isinstance(other, _Default): return False return self.__dict__ == other.__dict__ def __ne__(self, other): if not isinstance(other, _Default): return True return self.__dict__ != other.__dict__ class _XmlRemote(object): def __init__(self, name, alias=None, fetch=None, pushUrl=None, manifestUrl=None, review=None, revision=None): self.name = name self.fetchUrl = fetch self.pushUrl = pushUrl self.manifestUrl = manifestUrl self.remoteAlias = alias self.reviewUrl = review self.revision = revision self.resolvedFetchUrl = self._resolveFetchUrl() def __eq__(self, other): if not isinstance(other, _XmlRemote): return False return self.__dict__ == other.__dict__ def __ne__(self, other): if not isinstance(other, _XmlRemote): return True return self.__dict__ != other.__dict__ def _resolveFetchUrl(self): if self.fetchUrl is None: return '' url = self.fetchUrl.rstrip('/') manifestUrl = self.manifestUrl.rstrip('/') # urljoin will gets confused over quite a few things. The ones we care # about here are: # * no scheme in the base url, like <hostname:port> # We handle no scheme by replacing it with an obscure protocol, gopher # and then replacing it with the original when we are done. if manifestUrl.find(':') != manifestUrl.find('/') - 1: url = urllib.parse.urljoin('gopher://' + manifestUrl, url) url = re.sub(r'^gopher://', '', url) else: url = urllib.parse.urljoin(manifestUrl, url) return url def ToRemoteSpec(self, projectName): fetchUrl = self.resolvedFetchUrl.rstrip('/') url = fetchUrl + '/' + projectName remoteName = self.name if self.remoteAlias: remoteName = self.remoteAlias return RemoteSpec(remoteName, url=url, pushUrl=self.pushUrl, review=self.reviewUrl, orig_name=self.name, fetchUrl=self.fetchUrl) class XmlManifest(object): """manages the repo configuration file""" def __init__(self, repodir, manifest_file, local_manifests=None): """Initialize. Args: repodir: Path to the .repo/ dir for holding all internal checkout state. It must be in the top directory of the repo client checkout. manifest_file: Full path to the manifest file to parse. This will usually be |repodir|/|MANIFEST_FILE_NAME|. local_manifests: Full path to the directory of local override manifests. This will usually be |repodir|/|LOCAL_MANIFESTS_DIR_NAME|. """ # TODO(vapier): Move this out of this class. self.globalConfig = GitConfig.ForUser() self.repodir = os.path.abspath(repodir) self.topdir = os.path.dirname(self.repodir) self.manifestFile = manifest_file self.local_manifests = local_manifests self._load_local_manifests = True self.repoProject = MetaProject(self, 'repo', gitdir=os.path.join(repodir, 'repo/.git'), worktree=os.path.join(repodir, 'repo')) mp = MetaProject(self, 'manifests', gitdir=os.path.join(repodir, 'manifests.git'), worktree=os.path.join(repodir, 'manifests')) self.manifestProject = mp # This is a bit hacky, but we're in a chicken & egg situation: all the # normal repo settings live in the manifestProject which we just setup # above, so we couldn't easily query before that. We assume Project() # init doesn't care if this changes afterwards. if os.path.exists(mp.gitdir) and mp.config.GetBoolean('repo.worktree'): mp.use_git_worktrees = True self._Unload() def Override(self, name, load_local_manifests=True): """Use a different manifest, just for the current instantiation. """ path = None # Look for a manifest by path in the filesystem (including the cwd). if not load_local_manifests: local_path = os.path.abspath(name) if os.path.isfile(local_path): path = local_path # Look for manifests by name from the manifests repo. if path is None: path = os.path.join(self.manifestProject.worktree, name) if not os.path.isfile(path): raise ManifestParseError('manifest %s not found' % name) old = self.manifestFile try: self._load_local_manifests = load_local_manifests self.manifestFile = path self._Unload() self._Load() finally: self.manifestFile = old def Link(self, name): """Update the repo metadata to use a different manifest. """ self.Override(name) # Old versions of repo would generate symlinks we need to clean up. if os.path.lexists(self.manifestFile): platform_utils.remove(self.manifestFile) # This file is interpreted as if it existed inside the manifest repo. # That allows us to use <include> with the relative file name. with open(self.manifestFile, 'w') as fp: fp.write("""<?xml version="1.0" encoding="UTF-8"?> <!-- DO NOT EDIT THIS FILE! It is generated by repo and changes will be discarded. If you want to use a different manifest, use `repo init -m <file>` instead. If you want to customize your checkout by overriding manifest settings, use the local_manifests/ directory instead. For more information on repo manifests, check out: https://gerrit.googlesource.com/git-repo/+/HEAD/docs/manifest-format.md --> <manifest> <include name="%s" /> </manifest> """ % (name,)) def _RemoteToXml(self, r, doc, root): e = doc.createElement('remote') root.appendChild(e) e.setAttribute('name', r.name) e.setAttribute('fetch', r.fetchUrl) if r.pushUrl is not None: e.setAttribute('pushurl', r.pushUrl) if r.remoteAlias is not None: e.setAttribute('alias', r.remoteAlias) if r.reviewUrl is not None: e.setAttribute('review', r.reviewUrl) if r.revision is not None: e.setAttribute('revision', r.revision) def _ParseList(self, field): """Parse fields that contain flattened lists. These are whitespace & comma separated. Empty elements will be discarded. """ return [x for x in re.split(r'[,\s]+', field) if x] def ToXml(self, peg_rev=False, peg_rev_upstream=True, peg_rev_dest_branch=True, groups=None): """Return the current manifest XML.""" mp = self.manifestProject if groups is None: groups = mp.config.GetString('manifest.groups') if groups: groups = self._ParseList(groups) doc = xml.dom.minidom.Document() root = doc.createElement('manifest') doc.appendChild(root) # Save out the notice. There's a little bit of work here to give it the # right whitespace, which assumes that the notice is automatically indented # by 4 by minidom. if self.notice: notice_element = root.appendChild(doc.createElement('notice')) notice_lines = self.notice.splitlines() indented_notice = ('\n'.join(" " * 4 + line for line in notice_lines))[4:] notice_element.appendChild(doc.createTextNode(indented_notice)) d = self.default for r in sorted(self.remotes): self._RemoteToXml(self.remotes[r], doc, root) if self.remotes: root.appendChild(doc.createTextNode('')) have_default = False e = doc.createElement('default') if d.remote: have_default = True e.setAttribute('remote', d.remote.name) if d.revisionExpr: have_default = True e.setAttribute('revision', d.revisionExpr) if d.destBranchExpr: have_default = True e.setAttribute('dest-branch', d.destBranchExpr) if d.upstreamExpr: have_default = True e.setAttribute('upstream', d.upstreamExpr) if d.sync_j > 1: have_default = True e.setAttribute('sync-j', '%d' % d.sync_j) if d.sync_c: have_default = True e.setAttribute('sync-c', 'true') if d.sync_s: have_default = True e.setAttribute('sync-s', 'true') if not d.sync_tags: have_default = True e.setAttribute('sync-tags', 'false') if have_default: root.appendChild(e) root.appendChild(doc.createTextNode('')) if self._manifest_server: e = doc.createElement('manifest-server') e.setAttribute('url', self._manifest_server) root.appendChild(e) root.appendChild(doc.createTextNode('')) def output_projects(parent, parent_node, projects): for project_name in projects: for project in self._projects[project_name]: output_project(parent, parent_node, project) def output_project(parent, parent_node, p): if not p.MatchesGroups(groups): return name = p.name relpath = p.relpath if parent: name = self._UnjoinName(parent.name, name) relpath = self._UnjoinRelpath(parent.relpath, relpath) e = doc.createElement('project') parent_node.appendChild(e) e.setAttribute('name', name) if relpath != name: e.setAttribute('path', relpath) remoteName = None if d.remote: remoteName = d.remote.name if not d.remote or p.remote.orig_name != remoteName: remoteName = p.remote.orig_name e.setAttribute('remote', remoteName) if peg_rev: if self.IsMirror: value = p.bare_git.rev_parse(p.revisionExpr + '^0') else: value = p.work_git.rev_parse(HEAD + '^0') e.setAttribute('revision', value) if peg_rev_upstream: if p.upstream: e.setAttribute('upstream', p.upstream) elif value != p.revisionExpr: # Only save the origin if the origin is not a sha1, and the default # isn't our value e.setAttribute('upstream', p.revisionExpr) if peg_rev_dest_branch: if p.dest_branch: e.setAttribute('dest-branch', p.dest_branch) elif value != p.revisionExpr: e.setAttribute('dest-branch', p.revisionExpr) else: revision = self.remotes[p.remote.orig_name].revision or d.revisionExpr if not revision or revision != p.revisionExpr: e.setAttribute('revision', p.revisionExpr) elif p.revisionId: e.setAttribute('revision', p.revisionId) if (p.upstream and (p.upstream != p.revisionExpr or p.upstream != d.upstreamExpr)): e.setAttribute('upstream', p.upstream) if p.dest_branch and p.dest_branch != d.destBranchExpr: e.setAttribute('dest-branch', p.dest_branch) for c in p.copyfiles: ce = doc.createElement('copyfile') ce.setAttribute('src', c.src) ce.setAttribute('dest', c.dest) e.appendChild(ce) for l in p.linkfiles: le = doc.createElement('linkfile') le.setAttribute('src', l.src) le.setAttribute('dest', l.dest) e.appendChild(le) default_groups = ['all', 'name:%s' % p.name, 'path:%s' % p.relpath] egroups = [g for g in p.groups if g not in default_groups] if egroups: e.setAttribute('groups', ','.join(egroups)) for a in p.annotations: if a.keep == "true": ae = doc.createElement('annotation') ae.setAttribute('name', a.name) ae.setAttribute('value', a.value) e.appendChild(ae) if p.sync_c: e.setAttribute('sync-c', 'true') if p.sync_s: e.setAttribute('sync-s', 'true') if not p.sync_tags: e.setAttribute('sync-tags', 'false') if p.clone_depth: e.setAttribute('clone-depth', str(p.clone_depth)) self._output_manifest_project_extras(p, e) if p.subprojects: subprojects = set(subp.name for subp in p.subprojects) output_projects(p, e, list(sorted(subprojects))) projects = set(p.name for p in self._paths.values() if not p.parent) output_projects(None, root, list(sorted(projects))) if self._repo_hooks_project: root.appendChild(doc.createTextNode('')) e = doc.createElement('repo-hooks') e.setAttribute('in-project', self._repo_hooks_project.name) e.setAttribute('enabled-list', ' '.join(self._repo_hooks_project.enabled_repo_hooks)) root.appendChild(e) if self._superproject: root.appendChild(doc.createTextNode('')) e = doc.createElement('superproject') e.setAttribute('name', self._superproject['name']) remoteName = None if d.remote: remoteName = d.remote.name remote = self._superproject.get('remote') if not d.remote or remote.orig_name != remoteName: remoteName = remote.orig_name e.setAttribute('remote', remoteName) root.appendChild(e) if self._contactinfo.bugurl != Wrapper().BUG_URL: root.appendChild(doc.createTextNode('')) e = doc.createElement('contactinfo') e.setAttribute('bugurl', self._contactinfo.bugurl) root.appendChild(e) return doc def ToDict(self, **kwargs): """Return the current manifest as a dictionary.""" # Elements that may only appear once. SINGLE_ELEMENTS = { 'notice', 'default', 'manifest-server', 'repo-hooks', 'superproject', 'contactinfo', } # Elements that may be repeated. MULTI_ELEMENTS = { 'remote', 'remove-project', 'project', 'extend-project', 'include', # These are children of 'project' nodes. 'annotation', 'project', 'copyfile', 'linkfile', } doc = self.ToXml(**kwargs) ret = {} def append_children(ret, node): for child in node.childNodes: if child.nodeType == xml.dom.Node.ELEMENT_NODE: attrs = child.attributes element = dict((attrs.item(i).localName, attrs.item(i).value) for i in range(attrs.length)) if child.nodeName in SINGLE_ELEMENTS: ret[child.nodeName] = element elif child.nodeName in MULTI_ELEMENTS: ret.setdefault(child.nodeName, []).append(element) else: raise ManifestParseError('Unhandled element "%s"' % (child.nodeName,)) append_children(element, child) append_children(ret, doc.firstChild) return ret def Save(self, fd, **kwargs): """Write the current manifest out to the given file descriptor.""" doc = self.ToXml(**kwargs) doc.writexml(fd, '', ' ', '\n', 'UTF-8') def _output_manifest_project_extras(self, p, e): """Manifests can modify e if they support extra project attributes.""" @property def paths(self): self._Load() return self._paths @property def projects(self): self._Load() return list(self._paths.values()) @property def remotes(self): self._Load() return self._remotes @property def default(self): self._Load() return self._default @property def repo_hooks_project(self): self._Load() return self._repo_hooks_project @property def superproject(self): self._Load() return self._superproject @property def contactinfo(self): self._Load() return self._contactinfo @property def notice(self): self._Load() return self._notice @property def manifest_server(self): self._Load() return self._manifest_server @property def CloneBundle(self): clone_bundle = self.manifestProject.config.GetBoolean('repo.clonebundle') if clone_bundle is None: return False if self.manifestProject.config.GetBoolean('repo.partialclone') else True else: return clone_bundle @property def CloneFilter(self): if self.manifestProject.config.GetBoolean('repo.partialclone'): return self.manifestProject.config.GetString('repo.clonefilter') return None @property def PartialCloneExclude(self): exclude = self.manifest.manifestProject.config.GetString( 'repo.partialcloneexclude') or '' return set(x.strip() for x in exclude.split(',')) @property def HasLocalManifests(self): return self._load_local_manifests and self.local_manifests @property def IsMirror(self): return self.manifestProject.config.GetBoolean('repo.mirror') @property def UseGitWorktrees(self): return self.manifestProject.config.GetBoolean('repo.worktree') @property def IsArchive(self): return self.manifestProject.config.GetBoolean('repo.archive') @property def HasSubmodules(self): return self.manifestProject.config.GetBoolean('repo.submodules') def GetDefaultGroupsStr(self): """Returns the default group string for the platform.""" return 'default,platform-' + platform.system().lower() def GetGroupsStr(self): """Returns the manifest group string that should be synced.""" groups = self.manifestProject.config.GetString('manifest.groups') if not groups: groups = self.GetDefaultGroupsStr() return groups def _Unload(self): self._loaded = False self._projects = {} self._paths = {} self._remotes = {} self._default = None self._repo_hooks_project = None self._superproject = {} self._contactinfo = ContactInfo(Wrapper().BUG_URL) self._notice = None self.branch = None self._manifest_server = None def _Load(self): if not self._loaded: m = self.manifestProject b = m.GetBranch(m.CurrentBranch).merge if b is not None and b.startswith(R_HEADS): b = b[len(R_HEADS):] self.branch = b # The manifestFile was specified by the user which is why we allow include # paths to point anywhere. nodes = [] nodes.append(self._ParseManifestXml( self.manifestFile, self.manifestProject.worktree, restrict_includes=False)) if self._load_local_manifests and self.local_manifests: try: for local_file in sorted(platform_utils.listdir(self.local_manifests)): if local_file.endswith('.xml'): local = os.path.join(self.local_manifests, local_file) # Since local manifests are entirely managed by the user, allow # them to point anywhere the user wants. nodes.append(self._ParseManifestXml( local, self.repodir, parent_groups=f'{LOCAL_MANIFEST_GROUP_PREFIX}:{local_file[:-4]}', restrict_includes=False)) except OSError: pass try: self._ParseManifest(nodes) except ManifestParseError as e: # There was a problem parsing, unload ourselves in case they catch # this error and try again later, we will show the correct error self._Unload() raise e if self.IsMirror: self._AddMetaProjectMirror(self.repoProject) self._AddMetaProjectMirror(self.manifestProject) self._loaded = True def _ParseManifestXml(self, path, include_root, parent_groups='', restrict_includes=True): """Parse a manifest XML and return the computed nodes. Args: path: The XML file to read & parse. include_root: The path to interpret include "name"s relative to. parent_groups: The groups to apply to this projects. restrict_includes: Whether to constrain the "name" attribute of includes. Returns: List of XML nodes. """ try: root = xml.dom.minidom.parse(path) except (OSError, xml.parsers.expat.ExpatError) as e: raise ManifestParseError("error parsing manifest %s: %s" % (path, e)) if not root or not root.childNodes: raise ManifestParseError("no root node in %s" % (path,)) for manifest in root.childNodes: if manifest.nodeName == 'manifest': break else: raise ManifestParseError("no <manifest> in %s" % (path,)) nodes = [] for node in manifest.childNodes: if node.nodeName == 'include': name = self._reqatt(node, 'name') if restrict_includes: msg = self._CheckLocalPath(name) if msg: raise ManifestInvalidPathError( '<include> invalid "name": %s: %s' % (name, msg)) include_groups = '' if parent_groups: include_groups = parent_groups if node.hasAttribute('groups'): include_groups = node.getAttribute('groups') + ',' + include_groups fp = os.path.join(include_root, name) if not os.path.isfile(fp): raise ManifestParseError("include [%s/]%s doesn't exist or isn't a file" % (include_root, name)) try: nodes.extend(self._ParseManifestXml(fp, include_root, include_groups)) # should isolate this to the exact exception, but that's # tricky. actual parsing implementation may vary. except (KeyboardInterrupt, RuntimeError, SystemExit, ManifestParseError): raise except Exception as e: raise ManifestParseError( "failed parsing included manifest %s: %s" % (name, e)) else: if parent_groups and node.nodeName == 'project': nodeGroups = parent_groups if node.hasAttribute('groups'): nodeGroups = node.getAttribute('groups') + ',' + nodeGroups node.setAttribute('groups', nodeGroups) nodes.append(node) return nodes def _ParseManifest(self, node_list): for node in itertools.chain(*node_list): if node.nodeName == 'remote': remote = self._ParseRemote(node) if remote: if remote.name in self._remotes: if remote != self._remotes[remote.name]: raise ManifestParseError( 'remote %s already exists with different attributes' % (remote.name)) else: self._remotes[remote.name] = remote for node in itertools.chain(*node_list): if node.nodeName == 'default': new_default = self._ParseDefault(node) emptyDefault = not node.hasAttributes() and not node.hasChildNodes() if self._default is None: self._default = new_default elif not emptyDefault and new_default != self._default: raise ManifestParseError('duplicate default in %s' % (self.manifestFile)) if self._default is None: self._default = _Default() for node in itertools.chain(*node_list): if node.nodeName == 'notice': if self._notice is not None: raise ManifestParseError( 'duplicate notice in %s' % (self.manifestFile)) self._notice = self._ParseNotice(node) for node in itertools.chain(*node_list): if node.nodeName == 'manifest-server': url = self._reqatt(node, 'url') if self._manifest_server is not None: raise ManifestParseError( 'duplicate manifest-server in %s' % (self.manifestFile)) self._manifest_server = url def recursively_add_projects(project): projects = self._projects.setdefault(project.name, []) if project.relpath is None: raise ManifestParseError( 'missing path for %s in %s' % (project.name, self.manifestFile)) if project.relpath in self._paths: raise ManifestParseError( 'duplicate path %s in %s' % (project.relpath, self.manifestFile)) self._paths[project.relpath] = project projects.append(project) for subproject in project.subprojects: recursively_add_projects(subproject) for node in itertools.chain(*node_list): if node.nodeName == 'project': project = self._ParseProject(node) recursively_add_projects(project) if node.nodeName == 'extend-project': name = self._reqatt(node, 'name') if name not in self._projects: raise ManifestParseError('extend-project element specifies non-existent ' 'project: %s' % name) path = node.getAttribute('path') groups = node.getAttribute('groups') if groups: groups = self._ParseList(groups) revision = node.getAttribute('revision') remote = node.getAttribute('remote') if remote: remote = self._get_remote(node) for p in self._projects[name]: if path and p.relpath != path: continue if groups: p.groups.extend(groups) if revision: p.revisionExpr = revision if IsId(revision): p.revisionId = revision else: p.revisionId = None if remote: p.remote = remote.ToRemoteSpec(name) if node.nodeName == 'repo-hooks': # Get the name of the project and the (space-separated) list of enabled. repo_hooks_project = self._reqatt(node, 'in-project') enabled_repo_hooks = self._ParseList(self._reqatt(node, 'enabled-list')) # Only one project can be the hooks project if self._repo_hooks_project is not None: raise ManifestParseError( 'duplicate repo-hooks in %s' % (self.manifestFile)) # Store a reference to the Project. try: repo_hooks_projects = self._projects[repo_hooks_project] except KeyError: raise ManifestParseError( 'project %s not found for repo-hooks' % (repo_hooks_project)) if len(repo_hooks_projects) != 1: raise ManifestParseError( 'internal error parsing repo-hooks in %s' % (self.manifestFile)) self._repo_hooks_project = repo_hooks_projects[0] # Store the enabled hooks in the Project object. self._repo_hooks_project.enabled_repo_hooks = enabled_repo_hooks if node.nodeName == 'superproject': name = self._reqatt(node, 'name') # There can only be one superproject. if self._superproject.get('name'): raise ManifestParseError( 'duplicate superproject in %s' % (self.manifestFile)) self._superproject['name'] = name remote_name = node.getAttribute('remote') if not remote_name: remote = self._default.remote else: remote = self._get_remote(node) if remote is None: raise ManifestParseError("no remote for superproject %s within %s" % (name, self.manifestFile)) self._superproject['remote'] = remote.ToRemoteSpec(name) if node.nodeName == 'contactinfo': bugurl = self._reqatt(node, 'bugurl') # This element can be repeated, later entries will clobber earlier ones. self._contactinfo = ContactInfo(bugurl) if node.nodeName == 'remove-project': name = self._reqatt(node, 'name') if name in self._projects: for p in self._projects[name]: del self._paths[p.relpath] del self._projects[name] # If the manifest removes the hooks project, treat it as if it deleted # the repo-hooks element too. if self._repo_hooks_project and (self._repo_hooks_project.name == name): self._repo_hooks_project = None elif not XmlBool(node, 'optional', False): raise ManifestParseError('remove-project element specifies non-existent ' 'project: %s' % name) def _AddMetaProjectMirror(self, m): name = None m_url = m.GetRemote(m.remote.name).url if m_url.endswith('/.git'): raise ManifestParseError('refusing to mirror %s' % m_url) if self._default and self._default.remote: url = self._default.remote.resolvedFetchUrl if not url.endswith('/'): url += '/' if m_url.startswith(url): remote = self._default.remote name = m_url[len(url):] if name is None: s = m_url.rindex('/') + 1 manifestUrl = self.manifestProject.config.GetString('remote.origin.url') remote = _XmlRemote('origin', fetch=m_url[:s], manifestUrl=manifestUrl) name = m_url[s:] if name.endswith('.git'): name = name[:-4] if name not in self._projects: m.PreSync() gitdir = os.path.join(self.topdir, '%s.git' % name) project = Project(manifest=self, name=name, remote=remote.ToRemoteSpec(name), gitdir=gitdir, objdir=gitdir, worktree=None, relpath=name or None, revisionExpr=m.revisionExpr, revisionId=None) self._projects[project.name] = [project] self._paths[project.relpath] = project def _ParseRemote(self, node): """ reads a <remote> element from the manifest file """ name = self._reqatt(node, 'name') alias = node.getAttribute('alias') if alias == '': alias = None fetch = self._reqatt(node, 'fetch') pushUrl = node.getAttribute('pushurl') if pushUrl == '': pushUrl = None review = node.getAttribute('review') if review == '': review = None revision = node.getAttribute('revision') if revision == '': revision = None manifestUrl = self.manifestProject.config.GetString('remote.origin.url') return _XmlRemote(name, alias, fetch, pushUrl, manifestUrl, review, revision) def _ParseDefault(self, node): """ reads a <default> element from the manifest file """ d = _Default() d.remote = self._get_remote(node) d.revisionExpr = node.getAttribute('revision') if d.revisionExpr == '': d.revisionExpr = None d.destBranchExpr = node.getAttribute('dest-branch') or None d.upstreamExpr = node.getAttribute('upstream') or None d.sync_j = XmlInt(node, 'sync-j', 1) if d.sync_j <= 0: raise ManifestParseError('%s: sync-j must be greater than 0, not "%s"' % (self.manifestFile, d.sync_j)) d.sync_c = XmlBool(node, 'sync-c', False) d.sync_s = XmlBool(node, 'sync-s', False) d.sync_tags = XmlBool(node, 'sync-tags', True) return d def _ParseNotice(self, node): """ reads a <notice> element from the manifest file The <notice> element is distinct from other tags in the XML in that the data is conveyed between the start and end tag (it's not an empty-element tag). The white space (carriage returns, indentation) for the notice element is relevant and is parsed in a way that is based on how python docstrings work. In fact, the code is remarkably similar to here: http://www.python.org/dev/peps/pep-0257/ """ # Get the data out of the node... notice = node.childNodes[0].data # Figure out minimum indentation, skipping the first line (the same line # as the <notice> tag)... minIndent = sys.maxsize lines = notice.splitlines() for line in lines[1:]: lstrippedLine = line.lstrip() if lstrippedLine: indent = len(line) - len(lstrippedLine) minIndent = min(indent, minIndent) # Strip leading / trailing blank lines and also indentation. cleanLines = [lines[0].strip()] for line in lines[1:]: cleanLines.append(line[minIndent:].rstrip()) # Clear completely blank lines from front and back... while cleanLines and not cleanLines[0]: del cleanLines[0] while cleanLines and not cleanLines[-1]: del cleanLines[-1] return '\n'.join(cleanLines) def _JoinName(self, parent_name, name): return os.path.join(parent_name, name) def _UnjoinName(self, parent_name, name): return os.path.relpath(name, parent_name) def _ParseProject(self, node, parent=None, **extra_proj_attrs): """ reads a <project> element from the manifest file """ name = self._reqatt(node, 'name') msg = self._CheckLocalPath(name, dir_ok=True) if msg: raise ManifestInvalidPathError( '<project> invalid "name": %s: %s' % (name, msg)) if parent: name = self._JoinName(parent.name, name) remote = self._get_remote(node) if remote is None: remote = self._default.remote if remote is None: raise ManifestParseError("no remote for project %s within %s" % (name, self.manifestFile)) revisionExpr = node.getAttribute('revision') or remote.revision if not revisionExpr: revisionExpr = self._default.revisionExpr if not revisionExpr: raise ManifestParseError("no revision for project %s within %s" % (name, self.manifestFile)) path = node.getAttribute('path') if not path: path = name else: # NB: The "." project is handled specially in Project.Sync_LocalHalf. msg = self._CheckLocalPath(path, dir_ok=True, cwd_dot_ok=True) if msg: raise ManifestInvalidPathError( '<project> invalid "path": %s: %s' % (path, msg)) rebase = XmlBool(node, 'rebase', True) sync_c = XmlBool(node, 'sync-c', False) sync_s = XmlBool(node, 'sync-s', self._default.sync_s) sync_tags = XmlBool(node, 'sync-tags', self._default.sync_tags) clone_depth = XmlInt(node, 'clone-depth') if clone_depth is not None and clone_depth <= 0: raise ManifestParseError('%s: clone-depth must be greater than 0, not "%s"' % (self.manifestFile, clone_depth)) dest_branch = node.getAttribute('dest-branch') or self._default.destBranchExpr upstream = node.getAttribute('upstream') or self._default.upstreamExpr groups = '' if node.hasAttribute('groups'): groups = node.getAttribute('groups') groups = self._ParseList(groups) if parent is None: relpath, worktree, gitdir, objdir, use_git_worktrees = \ self.GetProjectPaths(name, path) else: use_git_worktrees = False relpath, worktree, gitdir, objdir = \ self.GetSubprojectPaths(parent, name, path) default_groups = ['all', 'name:%s' % name, 'path:%s' % relpath] groups.extend(set(default_groups).difference(groups)) if self.IsMirror and node.hasAttribute('force-path'): if XmlBool(node, 'force-path', False): gitdir = os.path.join(self.topdir, '%s.git' % path) project = Project(manifest=self, name=name, remote=remote.ToRemoteSpec(name), gitdir=gitdir, objdir=objdir, worktree=worktree, relpath=relpath, revisionExpr=revisionExpr, revisionId=None, rebase=rebase, groups=groups, sync_c=sync_c, sync_s=sync_s, sync_tags=sync_tags, clone_depth=clone_depth, upstream=upstream, parent=parent, dest_branch=dest_branch, use_git_worktrees=use_git_worktrees, **extra_proj_attrs) for n in node.childNodes: if n.nodeName == 'copyfile': self._ParseCopyFile(project, n) if n.nodeName == 'linkfile': self._ParseLinkFile(project, n) if n.nodeName == 'annotation': self._ParseAnnotation(project, n) if n.nodeName == 'project': project.subprojects.append(self._ParseProject(n, parent=project)) return project def GetProjectPaths(self, name, path): # The manifest entries might have trailing slashes. Normalize them to avoid # unexpected filesystem behavior since we do string concatenation below. path = path.rstrip('/') name = name.rstrip('/') use_git_worktrees = False relpath = path if self.IsMirror: worktree = None gitdir = os.path.join(self.topdir, '%s.git' % name) objdir = gitdir else: worktree = os.path.join(self.topdir, path).replace('\\', '/') gitdir = os.path.join(self.repodir, 'projects', '%s.git' % path) # We allow people to mix git worktrees & non-git worktrees for now. # This allows for in situ migration of repo clients. if os.path.exists(gitdir) or not self.UseGitWorktrees: objdir = os.path.join(self.repodir, 'project-objects', '%s.git' % name) else: use_git_worktrees = True gitdir = os.path.join(self.repodir, 'worktrees', '%s.git' % name) objdir = gitdir return relpath, worktree, gitdir, objdir, use_git_worktrees def GetProjectsWithName(self, name): return self._projects.get(name, []) def GetSubprojectName(self, parent, submodule_path): return os.path.join(parent.name, submodule_path) def _JoinRelpath(self, parent_relpath, relpath): return os.path.join(parent_relpath, relpath) def _UnjoinRelpath(self, parent_relpath, relpath): return os.path.relpath(relpath, parent_relpath) def GetSubprojectPaths(self, parent, name, path): # The manifest entries might have trailing slashes. Normalize them to avoid # unexpected filesystem behavior since we do string concatenation below. path = path.rstrip('/') name = name.rstrip('/') relpath = self._JoinRelpath(parent.relpath, path) gitdir = os.path.join(parent.gitdir, 'subprojects', '%s.git' % path) objdir = os.path.join(parent.gitdir, 'subproject-objects', '%s.git' % name) if self.IsMirror: worktree = None else: worktree = os.path.join(parent.worktree, path).replace('\\', '/') return relpath, worktree, gitdir, objdir @staticmethod def _CheckLocalPath(path, dir_ok=False, cwd_dot_ok=False): """Verify |path| is reasonable for use in filesystem paths. Used with <copyfile> & <linkfile> & <project> elements. This only validates the |path| in isolation: it does not check against the current filesystem state. Thus it is suitable as a first-past in a parser. It enforces a number of constraints: * No empty paths. * No "~" in paths. * No Unicode codepoints that filesystems might elide when normalizing. * No relative path components like "." or "..". * No absolute paths. * No ".git" or ".repo*" path components. Args: path: The path name to validate. dir_ok: Whether |path| may force a directory (e.g. end in a /). cwd_dot_ok: Whether |path| may be just ".". Returns: None if |path| is OK, a failure message otherwise. """ if not path: return 'empty paths not allowed' if '~' in path: return '~ not allowed (due to 8.3 filenames on Windows filesystems)' path_codepoints = set(path) # Some filesystems (like Apple's HFS+) try to normalize Unicode codepoints # which means there are alternative names for ".git". Reject paths with # these in it as there shouldn't be any reasonable need for them here. # The set of codepoints here was cribbed from jgit's implementation: # https://eclipse.googlesource.com/jgit/jgit/+/9110037e3e9461ff4dac22fee84ef3694ed57648/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java#884 BAD_CODEPOINTS = { u'\u200C', # ZERO WIDTH NON-JOINER u'\u200D', # ZERO WIDTH JOINER u'\u200E', # LEFT-TO-RIGHT MARK u'\u200F', # RIGHT-TO-LEFT MARK u'\u202A', # LEFT-TO-RIGHT EMBEDDING u'\u202B', # RIGHT-TO-LEFT EMBEDDING u'\u202C', # POP DIRECTIONAL FORMATTING u'\u202D', # LEFT-TO-RIGHT OVERRIDE u'\u202E', # RIGHT-TO-LEFT OVERRIDE u'\u206A', # INHIBIT SYMMETRIC SWAPPING u'\u206B', # ACTIVATE SYMMETRIC SWAPPING u'\u206C', # INHIBIT ARABIC FORM SHAPING u'\u206D', # ACTIVATE ARABIC FORM SHAPING u'\u206E', # NATIONAL DIGIT SHAPES u'\u206F', # NOMINAL DIGIT SHAPES u'\uFEFF', # ZERO WIDTH NO-BREAK SPACE } if BAD_CODEPOINTS & path_codepoints: # This message is more expansive than reality, but should be fine. return 'Unicode combining characters not allowed' # Reject newlines as there shouldn't be any legitmate use for them, they'll # be confusing to users, and they can easily break tools that expect to be # able to iterate over newline delimited lists. This even applies to our # own code like .repo/project.list. if {'\r', '\n'} & path_codepoints: return 'Newlines not allowed' # Assume paths might be used on case-insensitive filesystems. path = path.lower() # Split up the path by its components. We can't use os.path.sep exclusively # as some platforms (like Windows) will convert / to \ and that bypasses all # our constructed logic here. Especially since manifest authors only use # / in their paths. resep = re.compile(r'[/%s]' % re.escape(os.path.sep)) # Strip off trailing slashes as those only produce '' elements, and we use # parts to look for individual bad components. parts = resep.split(path.rstrip('/')) # Some people use src="." to create stable links to projects. Lets allow # that but reject all other uses of "." to keep things simple. if not cwd_dot_ok or parts != ['.']: for part in set(parts): if part in {'.', '..', '.git'} or part.startswith('.repo'): return 'bad component: %s' % (part,) if not dir_ok and resep.match(path[-1]): return 'dirs not allowed' # NB: The two abspath checks here are to handle platforms with multiple # filesystem path styles (e.g. Windows). norm = os.path.normpath(path) if (norm == '..' or (len(norm) >= 3 and norm.startswith('..') and resep.match(norm[0])) or os.path.isabs(norm) or norm.startswith('/')): return 'path cannot be outside' @classmethod def _ValidateFilePaths(cls, element, src, dest): """Verify |src| & |dest| are reasonable for <copyfile> & <linkfile>. We verify the path independent of any filesystem state as we won't have a checkout available to compare to. i.e. This is for parsing validation purposes only. We'll do full/live sanity checking before we do the actual filesystem modifications in _CopyFile/_LinkFile/etc... """ # |dest| is the file we write to or symlink we create. # It is relative to the top of the repo client checkout. msg = cls._CheckLocalPath(dest) if msg: raise ManifestInvalidPathError( '<%s> invalid "dest": %s: %s' % (element, dest, msg)) # |src| is the file we read from or path we point to for symlinks. # It is relative to the top of the git project checkout. is_linkfile = element == 'linkfile' msg = cls._CheckLocalPath(src, dir_ok=is_linkfile, cwd_dot_ok=is_linkfile) if msg: raise ManifestInvalidPathError( '<%s> invalid "src": %s: %s' % (element, src, msg)) def _ParseCopyFile(self, project, node): src = self._reqatt(node, 'src') dest = self._reqatt(node, 'dest') if not self.IsMirror: # src is project relative; # dest is relative to the top of the tree. # We only validate paths if we actually plan to process them. self._ValidateFilePaths('copyfile', src, dest) project.AddCopyFile(src, dest, self.topdir) def _ParseLinkFile(self, project, node): src = self._reqatt(node, 'src') dest = self._reqatt(node, 'dest') if not self.IsMirror: # src is project relative; # dest is relative to the top of the tree. # We only validate paths if we actually plan to process them. self._ValidateFilePaths('linkfile', src, dest) project.AddLinkFile(src, dest, self.topdir) def _ParseAnnotation(self, project, node): name = self._reqatt(node, 'name') value = self._reqatt(node, 'value') try: keep = self._reqatt(node, 'keep').lower() except ManifestParseError: keep = "true" if keep != "true" and keep != "false": raise ManifestParseError('optional "keep" attribute must be ' '"true" or "false"') project.AddAnnotation(name, value, keep) def _get_remote(self, node): name = node.getAttribute('remote') if not name: return None v = self._remotes.get(name) if not v: raise ManifestParseError("remote %s not defined in %s" % (name, self.manifestFile)) return v def _reqatt(self, node, attname): """ reads a required attribute from the node. """ v = node.getAttribute(attname) if not v: raise ManifestParseError("no %s in <%s> within %s" % (attname, node.nodeName, self.manifestFile)) return v def projectsDiff(self, manifest): """return the projects differences between two manifests. The diff will be from self to given manifest. """ fromProjects = self.paths toProjects = manifest.paths fromKeys = sorted(fromProjects.keys()) toKeys = sorted(toProjects.keys()) diff = {'added': [], 'removed': [], 'changed': [], 'unreachable': []} for proj in fromKeys: if proj not in toKeys: diff['removed'].append(fromProjects[proj]) else: fromProj = fromProjects[proj] toProj = toProjects[proj] try: fromRevId = fromProj.GetCommitRevisionId() toRevId = toProj.GetCommitRevisionId() except ManifestInvalidRevisionError: diff['unreachable'].append((fromProj, toProj)) else: if fromRevId != toRevId: diff['changed'].append((fromProj, toProj)) toKeys.remove(proj) for proj in toKeys: diff['added'].append(toProjects[proj]) return diff class GitcManifest(XmlManifest): """Parser for GitC (git-in-the-cloud) manifests.""" def _ParseProject(self, node, parent=None): """Override _ParseProject and add support for GITC specific attributes.""" return super()._ParseProject( node, parent=parent, old_revision=node.getAttribute('old-revision')) def _output_manifest_project_extras(self, p, e): """Output GITC Specific Project attributes""" if p.old_revision: e.setAttribute('old-revision', str(p.old_revision)) class RepoClient(XmlManifest): """Manages a repo client checkout.""" def __init__(self, repodir, manifest_file=None): self.isGitcClient = False if os.path.exists(os.path.join(repodir, LOCAL_MANIFEST_NAME)): print('error: %s is not supported; put local manifests in `%s` instead' % (LOCAL_MANIFEST_NAME, os.path.join(repodir, LOCAL_MANIFESTS_DIR_NAME)), file=sys.stderr) sys.exit(1) if manifest_file is None: manifest_file = os.path.join(repodir, MANIFEST_FILE_NAME) local_manifests = os.path.abspath(os.path.join(repodir, LOCAL_MANIFESTS_DIR_NAME)) super().__init__(repodir, manifest_file, local_manifests) # TODO: Completely separate manifest logic out of the client. self.manifest = self class GitcClient(RepoClient, GitcManifest): """Manages a GitC client checkout.""" def __init__(self, repodir, gitc_client_name): """Initialize the GitcManifest object.""" self.gitc_client_name = gitc_client_name self.gitc_client_dir = os.path.join(gitc_utils.get_gitc_manifest_dir(), gitc_client_name) super().__init__(repodir, os.path.join(self.gitc_client_dir, '.manifest')) self.isGitcClient = True