sync: Support downloading bundle to initialize repository

An HTTP (or HTTPS) based remote server may now offer a 'clone.bundle'
file in each repository's Git directory. Over an http:// or https://
remote repo will first ask for '$URL/clone.bundle', and if present
download this to bootstrap the local client, rather than relying
on the native Git transport to initialize the new repository.

Bundles may be hosted elsewhere. The client automatically follows a
HTTP 302 redirect to acquire the bundle file. This allows servers
to direct clients to cached copies residing on content delivery
networks, where the bundle may be closer to the end-user.

Bundle downloads are resumeable from where they last left off,
allowing clients to initialize large repositories even when the
connection gets interrupted.

If a bundle does not exist for a repository (a HTTP 404 response
code is returned for '$URL/clone.bundle'), the native Git transport
is used instead. If the client is performing a shallow sync, the
bundle transport is not used, as there is no way to embed shallow
data into the bundle.

Change-Id: I05dad17792fd6fd20635a0f71589566e557cc743
Signed-off-by: Shawn O. Pearce <sop@google.com>
This commit is contained in:
Shawn O. Pearce 2011-09-19 14:50:58 -07:00
parent db728cd866
commit f322b9abb4
6 changed files with 250 additions and 24 deletions

View File

@ -57,6 +57,15 @@ class UploadError(Exception):
def __str__(self):
return self.reason
class DownloadError(Exception):
"""Cannot download a repository.
"""
def __init__(self, reason):
self.reason = reason
def __str__(self):
return self.reason
class NoSuchProjectError(Exception):
"""A specified project does not exist in the work tree.
"""

View File

@ -491,6 +491,12 @@ def close_ssh():
URI_SCP = re.compile(r'^([^@:]*@?[^:/]{1,}):')
URI_ALL = re.compile(r'^([a-z][a-z+]*)://([^@/]*@?[^/]*)/')
def GetSchemeFromUrl(url):
m = URI_ALL.match(url)
if m:
return m.group(1)
return None
def _preconnect(url):
m = URI_ALL.match(url)
if m:

View File

@ -37,6 +37,7 @@ from command import InteractiveCommand
from command import MirrorSafeCommand
from command import PagedCommand
from editor import Editor
from error import DownloadError
from error import ManifestInvalidRevisionError
from error import NoSuchProjectError
from error import RepoChangedException
@ -143,6 +144,9 @@ class _Repo(object):
else:
print >>sys.stderr, 'real\t%dh%dm%.3fs' \
% (hours, minutes, seconds)
except DownloadError, e:
print >>sys.stderr, 'error: %s' % str(e)
sys.exit(1)
except ManifestInvalidRevisionError, e:
print >>sys.stderr, 'error: %s' % str(e)
sys.exit(1)

View File

@ -24,9 +24,11 @@ import urllib2
from color import Coloring
from git_command import GitCommand
from git_config import GitConfig, IsId
from git_config import GitConfig, IsId, GetSchemeFromUrl
from error import DownloadError
from error import GitError, HookError, ImportError, UploadError
from error import ManifestInvalidRevisionError
from progress import Progress
from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M
@ -884,15 +886,13 @@ class Project(object):
## Sync ##
def Sync_NetworkHalf(self, quiet=False):
def Sync_NetworkHalf(self, quiet=False, is_new=None):
"""Perform only the network IO portion of the sync process.
Local working directory/branch state is not affected.
"""
is_new = not self.Exists
if is_new is None:
is_new = not self.Exists
if is_new:
if not quiet:
print >>sys.stderr
print >>sys.stderr, 'Initializing project %s ...' % self.name
self._InitGitDir()
self._InitRemote()
@ -1312,9 +1312,16 @@ class Project(object):
name = self.remote.name
ssh_proxy = False
if self.GetRemote(name).PreConnectFetch():
remote = self.GetRemote(name)
if remote.PreConnectFetch():
ssh_proxy = True
bundle_dst = os.path.join(self.gitdir, 'clone.bundle')
bundle_tmp = os.path.join(self.gitdir, 'clone.bundle.tmp')
use_bundle = False
if os.path.exists(bundle_dst) or os.path.exists(bundle_tmp):
use_bundle = True
if initial:
alt = os.path.join(self.gitdir, 'objects/info/alternates')
try:
@ -1329,6 +1336,8 @@ class Project(object):
ref_dir = None
if ref_dir and 'objects' == os.path.basename(ref_dir):
if use_bundle:
use_bundle = False
ref_dir = os.path.dirname(ref_dir)
packed_refs = os.path.join(self.gitdir, 'packed-refs')
remote = self.GetRemote(name)
@ -1368,6 +1377,7 @@ class Project(object):
else:
ref_dir = None
use_bundle = True
cmd = ['fetch']
@ -1376,15 +1386,37 @@ class Project(object):
depth = self.manifest.manifestProject.config.GetString('repo.depth')
if depth and initial:
cmd.append('--depth=%s' % depth)
use_bundle = False
if quiet:
cmd.append('--quiet')
if not self.worktree:
cmd.append('--update-head-ok')
cmd.append(name)
if tag is not None:
cmd.append('tag')
cmd.append(tag)
if use_bundle and not os.path.exists(bundle_dst):
bundle_url = remote.url + '/clone.bundle'
bundle_url = GitConfig.ForUser().UrlInsteadOf(bundle_url)
if GetSchemeFromUrl(bundle_url) in ('http', 'https'):
use_bundle = self._FetchBundle(
bundle_url,
bundle_tmp,
bundle_dst,
quiet=quiet)
else:
use_bundle = False
if use_bundle:
if not quiet:
cmd.append('--quiet')
cmd.append(bundle_dst)
for f in remote.fetch:
cmd.append(str(f))
cmd.append('refs/tags/*:refs/tags/*')
else:
cmd.append(name)
if tag is not None:
cmd.append('tag')
cmd.append(tag)
ok = GitCommand(self,
cmd,
@ -1399,8 +1431,99 @@ class Project(object):
os.remove(packed_refs)
self.bare_git.pack_refs('--all', '--prune')
if os.path.exists(bundle_dst):
os.remove(bundle_dst)
if os.path.exists(bundle_tmp):
os.remove(bundle_tmp)
return ok
def _FetchBundle(self, srcUrl, tmpPath, dstPath, quiet=False):
keep = True
done = False
dest = open(tmpPath, 'a+b')
try:
dest.seek(0, os.SEEK_END)
pos = dest.tell()
req = urllib2.Request(srcUrl)
if pos > 0:
req.add_header('Range', 'bytes=%d-' % pos)
try:
r = urllib2.urlopen(req)
except urllib2.HTTPError, e:
if e.code == 404:
keep = False
return False
elif e.info()['content-type'] == 'text/plain':
try:
msg = e.read()
if len(msg) > 0 and msg[-1] == '\n':
msg = msg[0:-1]
msg = ' (%s)' % msg
except:
msg = ''
else:
try:
from BaseHTTPServer import BaseHTTPRequestHandler
res = BaseHTTPRequestHandler.responses[e.code]
msg = ' (%s: %s)' % (res[0], res[1])
except:
msg = ''
raise DownloadError('HTTP %s%s' % (e.code, msg))
except urllib2.URLError, e:
raise DownloadError('%s (%s)' % (e.reason, req.get_host()))
p = None
try:
size = r.headers['content-length']
unit = 1 << 10
if size and not quiet:
if size > 1024 * 1.3:
unit = 1 << 20
desc = 'MB'
else:
desc = 'KB'
p = Progress(
'Downloading %s' % self.relpath,
int(size) / unit,
units=desc)
if pos > 0:
p.update(pos / unit)
s = 0
while True:
d = r.read(8192)
if d == '':
done = True
return True
dest.write(d)
if p:
s += len(d)
if s >= unit:
p.update(s / unit)
s = s % unit
if p:
if s >= unit:
p.update(s / unit)
else:
p.update(1)
finally:
r.close()
if p:
p.end()
finally:
dest.close()
if os.path.exists(dstPath):
os.remove(dstPath)
if done:
os.rename(tmpPath, dstPath)
elif not keep:
os.remove(tmpPath)
def _Checkout(self, rev, quiet=False):
cmd = ['checkout']
if quiet:

103
repo
View File

@ -28,7 +28,7 @@ if __name__ == '__main__':
del magic
# increment this whenever we make important changes to this script
VERSION = (1, 12)
VERSION = (1, 13)
# increment this if the MAINTAINER_KEYS block is modified
KEYRING_VERSION = (1,0)
@ -91,6 +91,7 @@ import re
import readline
import subprocess
import sys
import urllib2
home_dot_repo = os.path.expanduser('~/.repoconfig')
gpg_dir = os.path.join(home_dot_repo, 'gnupg')
@ -187,10 +188,6 @@ def _Init(args):
else:
can_verify = True
if not opt.quiet:
print >>sys.stderr, 'Getting repo ...'
print >>sys.stderr, ' from %s' % url
dst = os.path.abspath(os.path.join(repodir, S_repo))
_Clone(url, dst, opt.quiet)
@ -300,15 +297,42 @@ def _SetConfig(local, name, value):
raise CloneFailure()
def _Fetch(local, quiet, *args):
def _InitHttp():
handlers = []
mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
try:
import netrc
n = netrc.netrc()
for host in n.hosts:
p = n.hosts[host]
mgr.add_password(None, 'http://%s/' % host, p[0], p[2])
mgr.add_password(None, 'https://%s/' % host, p[0], p[2])
except:
pass
handlers.append(urllib2.HTTPBasicAuthHandler(mgr))
if 'http_proxy' in os.environ:
url = os.environ['http_proxy']
handlers.append(urllib2.ProxyHandler({'http': url, 'https': url}))
if 'REPO_CURL_VERBOSE' in os.environ:
handlers.append(urllib2.HTTPHandler(debuglevel=1))
handlers.append(urllib2.HTTPSHandler(debuglevel=1))
urllib2.install_opener(urllib2.build_opener(*handlers))
def _Fetch(url, local, src, quiet):
if not quiet:
print >>sys.stderr, 'Get %s' % url
cmd = [GIT, 'fetch']
if quiet:
cmd.append('--quiet')
err = subprocess.PIPE
else:
err = None
cmd.extend(args)
cmd.append('origin')
cmd.append(src)
cmd.append('+refs/heads/*:refs/remotes/origin/*')
cmd.append('refs/tags/*:refs/tags/*')
proc = subprocess.Popen(cmd, cwd = local, stderr = err)
if err:
@ -317,6 +341,62 @@ def _Fetch(local, quiet, *args):
if proc.wait() != 0:
raise CloneFailure()
def _DownloadBundle(url, local, quiet):
if not url.endswith('/'):
url += '/'
url += 'clone.bundle'
proc = subprocess.Popen(
[GIT, 'config', '--get-regexp', 'url.*.insteadof'],
cwd = local,
stdout = subprocess.PIPE)
for line in proc.stdout:
m = re.compile(r'^url\.(.*)\.insteadof (.*)$').match(line)
if m:
new_url = m.group(1)
old_url = m.group(2)
if url.startswith(old_url):
url = new_url + url[len(old_url):]
break
proc.stdout.close()
proc.wait()
if not url.startswith('http:') and not url.startswith('https:'):
return False
dest = open(os.path.join(local, '.git', 'clone.bundle'), 'w+b')
try:
try:
r = urllib2.urlopen(url)
except urllib2.HTTPError, e:
if e.code == 404:
return False
print >>sys.stderr, 'fatal: Cannot get %s' % url
print >>sys.stderr, 'fatal: HTTP error %s' % e.code
raise CloneFailure()
except urllib2.URLError, e:
print >>sys.stderr, 'fatal: Cannot get %s' % url
print >>sys.stderr, 'fatal: error %s' % e.reason
raise CloneFailure()
try:
if not quiet:
print >>sys.stderr, 'Get %s' % url
while True:
buf = r.read(8192)
if buf == '':
return True
dest.write(buf)
finally:
r.close()
finally:
dest.close()
def _ImportBundle(local):
path = os.path.join(local, '.git', 'clone.bundle')
try:
_Fetch(local, local, path, True)
finally:
os.remove(path)
def _Clone(url, local, quiet):
"""Clones a git repository to a new subdirectory of repodir
@ -344,11 +424,14 @@ def _Clone(url, local, quiet):
print >>sys.stderr, 'fatal: could not create %s' % local
raise CloneFailure()
_InitHttp()
_SetConfig(local, 'remote.origin.url', url)
_SetConfig(local, 'remote.origin.fetch',
'+refs/heads/*:refs/remotes/origin/*')
_Fetch(local, quiet)
_Fetch(local, quiet, '--tags')
if _DownloadBundle(url, local, quiet):
_ImportBundle(local)
else:
_Fetch(url, local, 'origin', quiet)
def _Verify(cwd, branch, quiet):

View File

@ -21,6 +21,7 @@ from color import Coloring
from command import InteractiveCommand, MirrorSafeCommand
from error import ManifestParseError
from project import SyncBuffer
from git_config import GitConfig
from git_command import git_require, MIN_GIT_VERSION
class Init(InteractiveCommand, MirrorSafeCommand):
@ -108,8 +109,8 @@ to update the working directory files.
sys.exit(1)
if not opt.quiet:
print >>sys.stderr, 'Getting manifest ...'
print >>sys.stderr, ' from %s' % opt.manifest_url
print >>sys.stderr, 'Get %s' \
% GitConfig.ForUser().UrlInsteadOf(opt.manifest_url)
m._InitGitDir()
if opt.manifest_branch:
@ -138,7 +139,7 @@ to update the working directory files.
print >>sys.stderr, 'fatal: --mirror not supported on existing client'
sys.exit(1)
if not m.Sync_NetworkHalf():
if not m.Sync_NetworkHalf(is_new=is_new):
r = m.GetRemote(m.remote.name)
print >>sys.stderr, 'fatal: cannot obtain manifest %s' % r.url