Compare commits

..

4 Commits

Author SHA1 Message Date
c325dc35f6 sync: Fetch after applying bundle and retry after errors
After a $GIT_URL/clone.bundle has been applied to the new local
repository, perform an incremental fetch using `git fetch` to ensure
the local repository is up-to-date. This allows the hosting server
to offer stale /clone.bundle files to bootstrap a new client.

If a single git fetch fails, it may succeed again after a short
delay.  Transient failures are typical in environments where the
remote Git server happens to have limits on how many requests it
can serve at once (the anonymous git daemon, or an HTTP server).
Wait a randomized delay between 30 and 45 seconds and retry the
failed project once.  This delay gives the site time to recover
from a transient traffic spike, and the randomization makes it less
likely that a spike occurs again from all of the same clients.

Change-Id: I97fb0fcb33630fb78ac1a21d1a4a3e2268ab60c0
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-10-03 08:30:24 -07:00
f322b9abb4 sync: Support downloading bundle to initialize repository
An HTTP (or HTTPS) based remote server may now offer a 'clone.bundle'
file in each repository's Git directory. Over an http:// or https://
remote repo will first ask for '$URL/clone.bundle', and if present
download this to bootstrap the local client, rather than relying
on the native Git transport to initialize the new repository.

Bundles may be hosted elsewhere. The client automatically follows a
HTTP 302 redirect to acquire the bundle file. This allows servers
to direct clients to cached copies residing on content delivery
networks, where the bundle may be closer to the end-user.

Bundle downloads are resumeable from where they last left off,
allowing clients to initialize large repositories even when the
connection gets interrupted.

If a bundle does not exist for a repository (a HTTP 404 response
code is returned for '$URL/clone.bundle'), the native Git transport
is used instead. If the client is performing a shallow sync, the
bundle transport is not used, as there is no way to embed shallow
data into the bundle.

Change-Id: I05dad17792fd6fd20635a0f71589566e557cc743
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-09-28 10:07:36 -07:00
db728cd866 Allow remote url to be relative to manifst url 2011-09-28 10:07:01 -07:00
c4657969eb sync: Update default -j flag from manifest
If the manifest is updated and the default sync-j attribute
was modified, honor it during this sync session if the user
has not supplied a -j flag on the command line.

Change-Id: I127ee5c779e2bbbb40b30bddc10ec1fa704b3bf3
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-09-26 09:08:44 -07:00
8 changed files with 304 additions and 52 deletions

View File

@ -57,6 +57,15 @@ class UploadError(Exception):
def __str__(self): def __str__(self):
return self.reason return self.reason
class DownloadError(Exception):
"""Cannot download a repository.
"""
def __init__(self, reason):
self.reason = reason
def __str__(self):
return self.reason
class NoSuchProjectError(Exception): class NoSuchProjectError(Exception):
"""A specified project does not exist in the work tree. """A specified project does not exist in the work tree.
""" """

View File

@ -491,6 +491,12 @@ def close_ssh():
URI_SCP = re.compile(r'^([^@:]*@?[^:/]{1,}):') URI_SCP = re.compile(r'^([^@:]*@?[^:/]{1,}):')
URI_ALL = re.compile(r'^([a-z][a-z+]*)://([^@/]*@?[^/]*)/') URI_ALL = re.compile(r'^([a-z][a-z+]*)://([^@/]*@?[^/]*)/')
def GetSchemeFromUrl(url):
m = URI_ALL.match(url)
if m:
return m.group(1)
return None
def _preconnect(url): def _preconnect(url):
m = URI_ALL.match(url) m = URI_ALL.match(url)
if m: if m:

View File

@ -37,6 +37,7 @@ from command import InteractiveCommand
from command import MirrorSafeCommand from command import MirrorSafeCommand
from command import PagedCommand from command import PagedCommand
from editor import Editor from editor import Editor
from error import DownloadError
from error import ManifestInvalidRevisionError from error import ManifestInvalidRevisionError
from error import NoSuchProjectError from error import NoSuchProjectError
from error import RepoChangedException from error import RepoChangedException
@ -143,6 +144,9 @@ class _Repo(object):
else: else:
print >>sys.stderr, 'real\t%dh%dm%.3fs' \ print >>sys.stderr, 'real\t%dh%dm%.3fs' \
% (hours, minutes, seconds) % (hours, minutes, seconds)
except DownloadError, e:
print >>sys.stderr, 'error: %s' % str(e)
sys.exit(1)
except ManifestInvalidRevisionError, e: except ManifestInvalidRevisionError, e:
print >>sys.stderr, 'error: %s' % str(e) print >>sys.stderr, 'error: %s' % str(e)
sys.exit(1) sys.exit(1)

View File

@ -14,7 +14,9 @@
# limitations under the License. # limitations under the License.
import os import os
import re
import sys import sys
import urlparse
import xml.dom.minidom import xml.dom.minidom
from git_config import GitConfig, IsId from git_config import GitConfig, IsId
@ -24,6 +26,9 @@ from error import ManifestParseError
MANIFEST_FILE_NAME = 'manifest.xml' MANIFEST_FILE_NAME = 'manifest.xml'
LOCAL_MANIFEST_NAME = 'local_manifest.xml' LOCAL_MANIFEST_NAME = 'local_manifest.xml'
urlparse.uses_relative.extend(['ssh', 'git'])
urlparse.uses_netloc.extend(['ssh', 'git'])
class _Default(object): class _Default(object):
"""Project defaults within the manifest.""" """Project defaults within the manifest."""
@ -35,16 +40,22 @@ class _XmlRemote(object):
def __init__(self, def __init__(self,
name, name,
fetch=None, fetch=None,
manifestUrl=None,
review=None): review=None):
self.name = name self.name = name
self.fetchUrl = fetch self.fetchUrl = fetch
self.manifestUrl = manifestUrl
self.reviewUrl = review self.reviewUrl = review
def ToRemoteSpec(self, projectName): def ToRemoteSpec(self, projectName):
url = self.fetchUrl url = self.fetchUrl.rstrip('/') + '/' + projectName + '.git'
while url.endswith('/'): manifestUrl = self.manifestUrl.rstrip('/')
url = url[:-1] # urljoin will get confused if there is no scheme in the base url
url += '/%s.git' % projectName # ie, if manifestUrl is of the form <hostname:port>
if manifestUrl.find(':') != manifestUrl.find('/') - 1:
manifestUrl = 'gopher://' + manifestUrl
url = urlparse.urljoin(manifestUrl, url)
url = re.sub(r'^gopher://', '', url)
return RemoteSpec(self.name, url, self.reviewUrl) return RemoteSpec(self.name, url, self.reviewUrl)
class XmlManifest(object): class XmlManifest(object):
@ -366,7 +377,8 @@ class XmlManifest(object):
if name is None: if name is None:
s = m_url.rindex('/') + 1 s = m_url.rindex('/') + 1
remote = _XmlRemote('origin', m_url[:s]) manifestUrl = self.manifestProject.config.GetString('remote.origin.url')
remote = _XmlRemote('origin', m_url[:s], manifestUrl)
name = m_url[s:] name = m_url[s:]
if name.endswith('.git'): if name.endswith('.git'):
@ -394,7 +406,8 @@ class XmlManifest(object):
review = node.getAttribute('review') review = node.getAttribute('review')
if review == '': if review == '':
review = None review = None
return _XmlRemote(name, fetch, review) manifestUrl = self.manifestProject.config.GetString('remote.origin.url')
return _XmlRemote(name, fetch, manifestUrl, review)
def _ParseDefault(self, node): def _ParseDefault(self, node):
""" """

View File

@ -16,17 +16,21 @@ import traceback
import errno import errno
import filecmp import filecmp
import os import os
import random
import re import re
import shutil import shutil
import stat import stat
import sys import sys
import time
import urllib2 import urllib2
from color import Coloring from color import Coloring
from git_command import GitCommand from git_command import GitCommand
from git_config import GitConfig, IsId from git_config import GitConfig, IsId, GetSchemeFromUrl
from error import DownloadError
from error import GitError, HookError, ImportError, UploadError from error import GitError, HookError, ImportError, UploadError
from error import ManifestInvalidRevisionError from error import ManifestInvalidRevisionError
from progress import Progress
from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M
@ -884,19 +888,33 @@ class Project(object):
## Sync ## ## Sync ##
def Sync_NetworkHalf(self, quiet=False): def Sync_NetworkHalf(self, quiet=False, is_new=None):
"""Perform only the network IO portion of the sync process. """Perform only the network IO portion of the sync process.
Local working directory/branch state is not affected. Local working directory/branch state is not affected.
""" """
is_new = not self.Exists if is_new is None:
is_new = not self.Exists
if is_new: if is_new:
if not quiet:
print >>sys.stderr
print >>sys.stderr, 'Initializing project %s ...' % self.name
self._InitGitDir() self._InitGitDir()
self._InitRemote() self._InitRemote()
if not self._RemoteFetch(initial=is_new, quiet=quiet):
if is_new:
alt = os.path.join(self.gitdir, 'objects/info/alternates')
try:
fd = open(alt, 'rb')
try:
alt_dir = fd.readline().rstrip()
finally:
fd.close()
except IOError:
alt_dir = None
else:
alt_dir = None
if alt_dir is None and self._ApplyCloneBundle(initial=is_new, quiet=quiet):
is_new = False
if not self._RemoteFetch(initial=is_new, quiet=quiet, alt_dir=alt_dir):
return False return False
#Check that the requested ref was found after fetch #Check that the requested ref was found after fetch
@ -1307,29 +1325,19 @@ class Project(object):
def _RemoteFetch(self, name=None, tag=None, def _RemoteFetch(self, name=None, tag=None,
initial=False, initial=False,
quiet=False): quiet=False,
alt_dir=None):
if not name: if not name:
name = self.remote.name name = self.remote.name
ssh_proxy = False ssh_proxy = False
if self.GetRemote(name).PreConnectFetch(): remote = self.GetRemote(name)
if remote.PreConnectFetch():
ssh_proxy = True ssh_proxy = True
if initial: if initial:
alt = os.path.join(self.gitdir, 'objects/info/alternates') if alt_dir and 'objects' == os.path.basename(alt_dir):
try: ref_dir = os.path.dirname(alt_dir)
fd = open(alt, 'rb')
try:
ref_dir = fd.readline()
if ref_dir and ref_dir.endswith('\n'):
ref_dir = ref_dir[:-1]
finally:
fd.close()
except IOError, e:
ref_dir = None
if ref_dir and 'objects' == os.path.basename(ref_dir):
ref_dir = os.path.dirname(ref_dir)
packed_refs = os.path.join(self.gitdir, 'packed-refs') packed_refs = os.path.join(self.gitdir, 'packed-refs')
remote = self.GetRemote(name) remote = self.GetRemote(name)
@ -1365,9 +1373,8 @@ class Project(object):
old_packed += line old_packed += line
_lwrite(packed_refs, tmp_packed) _lwrite(packed_refs, tmp_packed)
else: else:
ref_dir = None alt_dir = None
cmd = ['fetch'] cmd = ['fetch']
@ -1386,21 +1393,149 @@ class Project(object):
cmd.append('tag') cmd.append('tag')
cmd.append(tag) cmd.append(tag)
ok = GitCommand(self, ok = False
cmd, for i in range(2):
bare = True, if GitCommand(self, cmd, bare=True, ssh_proxy=ssh_proxy).Wait() == 0:
ssh_proxy = ssh_proxy).Wait() == 0 ok = True
break
time.sleep(random.randint(30, 45))
if initial: if initial:
if ref_dir: if alt_dir:
if old_packed != '': if old_packed != '':
_lwrite(packed_refs, old_packed) _lwrite(packed_refs, old_packed)
else: else:
os.remove(packed_refs) os.remove(packed_refs)
self.bare_git.pack_refs('--all', '--prune') self.bare_git.pack_refs('--all', '--prune')
return ok return ok
def _ApplyCloneBundle(self, initial=False, quiet=False):
if initial and self.manifest.manifestProject.config.GetString('repo.depth'):
return False
remote = self.GetRemote(self.remote.name)
bundle_url = remote.url + '/clone.bundle'
bundle_url = GitConfig.ForUser().UrlInsteadOf(bundle_url)
if GetSchemeFromUrl(bundle_url) not in ('http', 'https'):
return False
bundle_dst = os.path.join(self.gitdir, 'clone.bundle')
bundle_tmp = os.path.join(self.gitdir, 'clone.bundle.tmp')
exist_dst = os.path.exists(bundle_dst)
exist_tmp = os.path.exists(bundle_tmp)
if not initial and not exist_dst and not exist_tmp:
return False
if not exist_dst:
exist_dst = self._FetchBundle(bundle_url, bundle_tmp, bundle_dst, quiet)
if not exist_dst:
return False
cmd = ['fetch']
if quiet:
cmd.append('--quiet')
if not self.worktree:
cmd.append('--update-head-ok')
cmd.append(bundle_dst)
for f in remote.fetch:
cmd.append(str(f))
cmd.append('refs/tags/*:refs/tags/*')
ok = GitCommand(self, cmd, bare=True).Wait() == 0
if os.path.exists(bundle_dst):
os.remove(bundle_dst)
if os.path.exists(bundle_tmp):
os.remove(bundle_tmp)
return ok
def _FetchBundle(self, srcUrl, tmpPath, dstPath, quiet):
keep = True
done = False
dest = open(tmpPath, 'a+b')
try:
dest.seek(0, os.SEEK_END)
pos = dest.tell()
req = urllib2.Request(srcUrl)
if pos > 0:
req.add_header('Range', 'bytes=%d-' % pos)
try:
r = urllib2.urlopen(req)
except urllib2.HTTPError, e:
if e.code == 404:
keep = False
return False
elif e.info()['content-type'] == 'text/plain':
try:
msg = e.read()
if len(msg) > 0 and msg[-1] == '\n':
msg = msg[0:-1]
msg = ' (%s)' % msg
except:
msg = ''
else:
try:
from BaseHTTPServer import BaseHTTPRequestHandler
res = BaseHTTPRequestHandler.responses[e.code]
msg = ' (%s: %s)' % (res[0], res[1])
except:
msg = ''
raise DownloadError('HTTP %s%s' % (e.code, msg))
except urllib2.URLError, e:
raise DownloadError('%s (%s)' % (e.reason, req.get_host()))
p = None
try:
size = r.headers['content-length']
unit = 1 << 10
if size and not quiet:
if size > 1024 * 1.3:
unit = 1 << 20
desc = 'MB'
else:
desc = 'KB'
p = Progress(
'Downloading %s' % self.relpath,
int(size) / unit,
units=desc)
if pos > 0:
p.update(pos / unit)
s = 0
while True:
d = r.read(8192)
if d == '':
done = True
return True
dest.write(d)
if p:
s += len(d)
if s >= unit:
p.update(s / unit)
s = s % unit
if p:
if s >= unit:
p.update(s / unit)
else:
p.update(1)
finally:
r.close()
if p:
p.end()
finally:
dest.close()
if os.path.exists(dstPath):
os.remove(dstPath)
if done:
os.rename(tmpPath, dstPath)
elif not keep:
os.remove(tmpPath)
def _Checkout(self, rev, quiet=False): def _Checkout(self, rev, quiet=False):
cmd = ['checkout'] cmd = ['checkout']
if quiet: if quiet:

103
repo
View File

@ -28,7 +28,7 @@ if __name__ == '__main__':
del magic del magic
# increment this whenever we make important changes to this script # increment this whenever we make important changes to this script
VERSION = (1, 12) VERSION = (1, 13)
# increment this if the MAINTAINER_KEYS block is modified # increment this if the MAINTAINER_KEYS block is modified
KEYRING_VERSION = (1,0) KEYRING_VERSION = (1,0)
@ -91,6 +91,7 @@ import re
import readline import readline
import subprocess import subprocess
import sys import sys
import urllib2
home_dot_repo = os.path.expanduser('~/.repoconfig') home_dot_repo = os.path.expanduser('~/.repoconfig')
gpg_dir = os.path.join(home_dot_repo, 'gnupg') gpg_dir = os.path.join(home_dot_repo, 'gnupg')
@ -187,10 +188,6 @@ def _Init(args):
else: else:
can_verify = True can_verify = True
if not opt.quiet:
print >>sys.stderr, 'Getting repo ...'
print >>sys.stderr, ' from %s' % url
dst = os.path.abspath(os.path.join(repodir, S_repo)) dst = os.path.abspath(os.path.join(repodir, S_repo))
_Clone(url, dst, opt.quiet) _Clone(url, dst, opt.quiet)
@ -300,15 +297,42 @@ def _SetConfig(local, name, value):
raise CloneFailure() raise CloneFailure()
def _Fetch(local, quiet, *args): def _InitHttp():
handlers = []
mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
try:
import netrc
n = netrc.netrc()
for host in n.hosts:
p = n.hosts[host]
mgr.add_password(None, 'http://%s/' % host, p[0], p[2])
mgr.add_password(None, 'https://%s/' % host, p[0], p[2])
except:
pass
handlers.append(urllib2.HTTPBasicAuthHandler(mgr))
if 'http_proxy' in os.environ:
url = os.environ['http_proxy']
handlers.append(urllib2.ProxyHandler({'http': url, 'https': url}))
if 'REPO_CURL_VERBOSE' in os.environ:
handlers.append(urllib2.HTTPHandler(debuglevel=1))
handlers.append(urllib2.HTTPSHandler(debuglevel=1))
urllib2.install_opener(urllib2.build_opener(*handlers))
def _Fetch(url, local, src, quiet):
if not quiet:
print >>sys.stderr, 'Get %s' % url
cmd = [GIT, 'fetch'] cmd = [GIT, 'fetch']
if quiet: if quiet:
cmd.append('--quiet') cmd.append('--quiet')
err = subprocess.PIPE err = subprocess.PIPE
else: else:
err = None err = None
cmd.extend(args) cmd.append(src)
cmd.append('origin') cmd.append('+refs/heads/*:refs/remotes/origin/*')
cmd.append('refs/tags/*:refs/tags/*')
proc = subprocess.Popen(cmd, cwd = local, stderr = err) proc = subprocess.Popen(cmd, cwd = local, stderr = err)
if err: if err:
@ -317,6 +341,62 @@ def _Fetch(local, quiet, *args):
if proc.wait() != 0: if proc.wait() != 0:
raise CloneFailure() raise CloneFailure()
def _DownloadBundle(url, local, quiet):
if not url.endswith('/'):
url += '/'
url += 'clone.bundle'
proc = subprocess.Popen(
[GIT, 'config', '--get-regexp', 'url.*.insteadof'],
cwd = local,
stdout = subprocess.PIPE)
for line in proc.stdout:
m = re.compile(r'^url\.(.*)\.insteadof (.*)$').match(line)
if m:
new_url = m.group(1)
old_url = m.group(2)
if url.startswith(old_url):
url = new_url + url[len(old_url):]
break
proc.stdout.close()
proc.wait()
if not url.startswith('http:') and not url.startswith('https:'):
return False
dest = open(os.path.join(local, '.git', 'clone.bundle'), 'w+b')
try:
try:
r = urllib2.urlopen(url)
except urllib2.HTTPError, e:
if e.code == 404:
return False
print >>sys.stderr, 'fatal: Cannot get %s' % url
print >>sys.stderr, 'fatal: HTTP error %s' % e.code
raise CloneFailure()
except urllib2.URLError, e:
print >>sys.stderr, 'fatal: Cannot get %s' % url
print >>sys.stderr, 'fatal: error %s' % e.reason
raise CloneFailure()
try:
if not quiet:
print >>sys.stderr, 'Get %s' % url
while True:
buf = r.read(8192)
if buf == '':
return True
dest.write(buf)
finally:
r.close()
finally:
dest.close()
def _ImportBundle(local):
path = os.path.join(local, '.git', 'clone.bundle')
try:
_Fetch(local, local, path, True)
finally:
os.remove(path)
def _Clone(url, local, quiet): def _Clone(url, local, quiet):
"""Clones a git repository to a new subdirectory of repodir """Clones a git repository to a new subdirectory of repodir
@ -344,11 +424,14 @@ def _Clone(url, local, quiet):
print >>sys.stderr, 'fatal: could not create %s' % local print >>sys.stderr, 'fatal: could not create %s' % local
raise CloneFailure() raise CloneFailure()
_InitHttp()
_SetConfig(local, 'remote.origin.url', url) _SetConfig(local, 'remote.origin.url', url)
_SetConfig(local, 'remote.origin.fetch', _SetConfig(local, 'remote.origin.fetch',
'+refs/heads/*:refs/remotes/origin/*') '+refs/heads/*:refs/remotes/origin/*')
_Fetch(local, quiet) if _DownloadBundle(url, local, quiet):
_Fetch(local, quiet, '--tags') _ImportBundle(local)
else:
_Fetch(url, local, 'origin', quiet)
def _Verify(cwd, branch, quiet): def _Verify(cwd, branch, quiet):

View File

@ -21,6 +21,7 @@ from color import Coloring
from command import InteractiveCommand, MirrorSafeCommand from command import InteractiveCommand, MirrorSafeCommand
from error import ManifestParseError from error import ManifestParseError
from project import SyncBuffer from project import SyncBuffer
from git_config import GitConfig
from git_command import git_require, MIN_GIT_VERSION from git_command import git_require, MIN_GIT_VERSION
class Init(InteractiveCommand, MirrorSafeCommand): class Init(InteractiveCommand, MirrorSafeCommand):
@ -108,8 +109,8 @@ to update the working directory files.
sys.exit(1) sys.exit(1)
if not opt.quiet: if not opt.quiet:
print >>sys.stderr, 'Getting manifest ...' print >>sys.stderr, 'Get %s' \
print >>sys.stderr, ' from %s' % opt.manifest_url % GitConfig.ForUser().UrlInsteadOf(opt.manifest_url)
m._InitGitDir() m._InitGitDir()
if opt.manifest_branch: if opt.manifest_branch:
@ -138,7 +139,7 @@ to update the working directory files.
print >>sys.stderr, 'fatal: --mirror not supported on existing client' print >>sys.stderr, 'fatal: --mirror not supported on existing client'
sys.exit(1) sys.exit(1)
if not m.Sync_NetworkHalf(): if not m.Sync_NetworkHalf(is_new=is_new):
r = m.GetRemote(m.remote.name) r = m.GetRemote(m.remote.name)
print >>sys.stderr, 'fatal: cannot obtain manifest %s' % r.url print >>sys.stderr, 'fatal: cannot obtain manifest %s' % r.url

View File

@ -136,7 +136,6 @@ later is required to fix a server side protocol bug.
help='be more quiet') help='be more quiet')
p.add_option('-j','--jobs', p.add_option('-j','--jobs',
dest='jobs', action='store', type='int', dest='jobs', action='store', type='int',
default=self.jobs,
help="projects to fetch simultaneously (default %d)" % self.jobs) help="projects to fetch simultaneously (default %d)" % self.jobs)
if show_smart: if show_smart:
p.add_option('-s', '--smart-sync', p.add_option('-s', '--smart-sync',
@ -401,6 +400,8 @@ uncommitted changes are present' % project.relpath
if not syncbuf.Finish(): if not syncbuf.Finish():
sys.exit(1) sys.exit(1)
self.manifest._Unload() self.manifest._Unload()
if opt.jobs is None:
self.jobs = self.manifest.default.sync_j
all = self.GetProjects(args, missing_ok=True) all = self.GetProjects(args, missing_ok=True)
if not opt.local_only: if not opt.local_only: