Compare commits

...

10 Commits

Author SHA1 Message Date
9d8f914fe8 Remove extra '/' in RemoteSpec
urljoin appends a '/' if only the domain is in the url path.  This
change strips that off before creating a RemoteSpec
2011-11-03 13:05:14 -07:00
ceea368e88 Correctly name projects when mirroring
A bug introduced by relative urls caused projects such as manifest.git
to be placed in the root directory instead of the directory they should
by in.

This fix creates and refers to a resolvedFetchUrl in the _XmlRemote
class in order to get a fetchUrl that is never relative.
2011-10-20 11:01:38 -07:00
b660539c4a Fix sync on Python 2.6.6
Python 2.6.6 has the same bug as Python 2.7, where HTTP
authentication just stops working, but does not have the
setter method to clear the retry counter. Work around by
setting the field directly if it exists.

Change-Id: I6a742e606bb7750dc66c33fc7c5d1310541db2c8
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-10-11 15:58:07 -07:00
752371d91b help: Fix help sync
help sync crashed as sync required the manifest to be configured to
create the option parser, as the default number of jobs is required.

Change-Id: Ie75e8d75ac0e38313e4aab451cbb24430e84def5
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-10-11 15:23:41 -07:00
1a68dc58eb upload: Honor REPO_HOST_PORT_INFO environment variable
REPO_HOST_PORT_INFO can be set to 'host:port' and be used
instead of the review URL given in the manifest.

Change-Id: I440bdecb2c2249fe5285ec5d0c28a937b4053450
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-10-11 14:12:46 -07:00
df5ee52050 Fix Python 2.4 support
Change-Id: I89521ae52fa564f0d849cc51e71fee65b3c47bab
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-10-11 14:06:11 -07:00
fab96c68e3 Work around Python 2.7 urllib2 bug
If the remote is using authenticated HTTP, but does not have
$GIT_URL/clone.bundle files in each repository, an initial sync
would fail around 8 projects in due to the library not resetting
the number of failures after getting a 404.

Work around this by updating the retry counter ourselves.

The urllib2 library is also not thread-safe. Make it somewhat
safer by wrapping the critical section with a lock.

Change-Id: I886e2750ef4793cbe2150c3b5396eb9f10974f7f
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-10-11 12:18:07 -07:00
bf1fbb20ab Fix AttributeError: 'HTTPError' object has no attribute 'reason'
Not every version of urllib2 supplies a reason object on the
HTTPError exception that it throws from urlopen().  Work around
this by using str(e) instead and hope the string formatting includes
sufficient information.

Change-Id: I0f4586dba0aa7152691b2371627c951f91fdfc8d
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-10-11 09:31:58 -07:00
29472463ba Work around Python 2.7 failure to initialize base class
urllib2 returns a malformed HTTPError object in certain situations.
For example, urllib2 has a couple of places where it creates an
HTTPError object with no fp:

  if self.retried > 5:
    # retry sending the username:password 5 times before failing.
    raise HTTPError(req.get_full_url(), 401, "basic auth failed",
                    headers, None)

When it does that, HTTPError's ctor doesn't call through to
addinfourl's ctor:

  # The addinfourl classes depend on fp being a valid file
  # object.  In some cases, the HTTPError may not have a valid
  # file object.  If this happens, the simplest workaround is to
  # not initialize the base classes.
  if fp is not None:
    self.__super_init(fp, hdrs, url, code)

Which means the 'headers' slot in addinfourl is not initialized and
info() fails.  It is completely insane that urllib2 decides not to
initialize its own base class sometimes.

Change-Id: I32a0d738f71bdd7d38d86078b71d9001e26f1ec3
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-10-11 09:24:07 -07:00
c325dc35f6 sync: Fetch after applying bundle and retry after errors
After a $GIT_URL/clone.bundle has been applied to the new local
repository, perform an incremental fetch using `git fetch` to ensure
the local repository is up-to-date. This allows the hosting server
to offer stale /clone.bundle files to bootstrap a new client.

If a single git fetch fails, it may succeed again after a short
delay.  Transient failures are typical in environments where the
remote Git server happens to have limits on how many requests it
can serve at once (the anonymous git daemon, or an HTTP server).
Wait a randomized delay between 30 and 45 seconds and retry the
failed project once.  This delay gives the site time to recover
from a transient traffic spike, and the randomization makes it less
likely that a spike occurs again from all of the same clients.

Change-Id: I97fb0fcb33630fb78ac1a21d1a4a3e2268ab60c0
Signed-off-by: Shawn O. Pearce <sop@google.com>
2011-10-03 08:30:24 -07:00
6 changed files with 170 additions and 108 deletions

View File

@ -26,7 +26,6 @@ import time
import urllib2
from signal import SIGTERM
from urllib2 import urlopen, HTTPError
from error import GitError, UploadError
from trace import Trace
@ -576,9 +575,19 @@ class Remote(object):
self._review_protocol = info[0]
self._review_host = info[1]
self._review_port = info[2]
elif 'REPO_HOST_PORT_INFO' in os.environ:
info = os.environ['REPO_HOST_PORT_INFO']
self._review_protocol = 'ssh'
self._review_host = info.split(" ")[0]
self._review_port = info.split(" ")[1]
REVIEW_CACHE[u] = (
self._review_protocol,
self._review_host,
self._review_port)
else:
try:
info = urlopen(u).read()
info = urllib2.urlopen(u).read()
if info == 'NOT_AVAILABLE':
raise UploadError('%s: SSH disabled' % self.review)
if '<' in info:
@ -590,15 +599,15 @@ class Remote(object):
self._review_protocol = 'ssh'
self._review_host = info.split(" ")[0]
self._review_port = info.split(" ")[1]
except urllib2.URLError, e:
raise UploadError('%s: %s' % (self.review, e.reason[1]))
except HTTPError, e:
except urllib2.HTTPError, e:
if e.code == 404:
self._review_protocol = 'http-post'
self._review_host = None
self._review_port = None
else:
raise UploadError('Upload over ssh unavailable')
raise UploadError('Upload over SSH unavailable')
except urllib2.URLError, e:
raise UploadError('%s: %s' % (self.review, str(e)))
REVIEW_CACHE[u] = (
self._review_protocol,

20
main.py
View File

@ -273,6 +273,24 @@ class _UserAgentHandler(urllib2.BaseHandler):
req.add_header('User-Agent', _UserAgent())
return req
class _BasicAuthHandler(urllib2.HTTPBasicAuthHandler):
def http_error_auth_reqed(self, authreq, host, req, headers):
try:
old_add_header = req.add_header
def _add_header(name, val):
val = val.replace('\n', '')
old_add_header(name, val)
req.add_header = _add_header
return urllib2.AbstractBasicAuthHandler.http_error_auth_reqed(
self, authreq, host, req, headers)
except:
reset = getattr(self, 'reset_retry_count', None)
if reset is not None:
reset()
elif getattr(self, 'retried', None):
self.retried = 0
raise
def init_http():
handlers = [_UserAgentHandler()]
@ -287,7 +305,7 @@ def init_http():
pass
except IOError:
pass
handlers.append(urllib2.HTTPBasicAuthHandler(mgr))
handlers.append(_BasicAuthHandler(mgr))
if 'http_proxy' in os.environ:
url = os.environ['http_proxy']

View File

@ -46,16 +46,20 @@ class _XmlRemote(object):
self.fetchUrl = fetch
self.manifestUrl = manifestUrl
self.reviewUrl = review
self.resolvedFetchUrl = self._resolveFetchUrl()
def ToRemoteSpec(self, projectName):
url = self.fetchUrl.rstrip('/') + '/' + projectName + '.git'
def _resolveFetchUrl(self):
url = self.fetchUrl.rstrip('/')
manifestUrl = self.manifestUrl.rstrip('/')
# urljoin will get confused if there is no scheme in the base url
# ie, if manifestUrl is of the form <hostname:port>
if manifestUrl.find(':') != manifestUrl.find('/') - 1:
manifestUrl = 'gopher://' + manifestUrl
url = urlparse.urljoin(manifestUrl, url)
url = re.sub(r'^gopher://', '', url)
return re.sub(r'^gopher://', '', url)
def ToRemoteSpec(self, projectName):
url = self.resolvedFetchUrl.rstrip('/') + '/' + projectName
return RemoteSpec(self.name, url, self.reviewUrl)
class XmlManifest(object):
@ -368,7 +372,7 @@ class XmlManifest(object):
raise ManifestParseError, 'refusing to mirror %s' % m_url
if self._default and self._default.remote:
url = self._default.remote.fetchUrl
url = self._default.remote.resolvedFetchUrl
if not url.endswith('/'):
url += '/'
if m_url.startswith(url):

View File

@ -16,12 +16,24 @@ import traceback
import errno
import filecmp
import os
import random
import re
import shutil
import stat
import sys
import time
import urllib2
try:
import threading as _threading
except ImportError:
import dummy_threading as _threading
try:
from os import SEEK_END
except ImportError:
SEEK_END = 2
from color import Coloring
from git_command import GitCommand
from git_config import GitConfig, IsId, GetSchemeFromUrl
@ -32,6 +44,8 @@ from progress import Progress
from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M
_urllib_lock = _threading.Lock()
def _lwrite(path, content):
lock = '%s.lock' % path
@ -894,9 +908,25 @@ class Project(object):
is_new = not self.Exists
if is_new:
self._InitGitDir()
self._InitRemote()
if not self._RemoteFetch(initial=is_new, quiet=quiet):
if is_new:
alt = os.path.join(self.gitdir, 'objects/info/alternates')
try:
fd = open(alt, 'rb')
try:
alt_dir = fd.readline().rstrip()
finally:
fd.close()
except IOError:
alt_dir = None
else:
alt_dir = None
if alt_dir is None and self._ApplyCloneBundle(initial=is_new, quiet=quiet):
is_new = False
if not self._RemoteFetch(initial=is_new, quiet=quiet, alt_dir=alt_dir):
return False
#Check that the requested ref was found after fetch
@ -1307,7 +1337,8 @@ class Project(object):
def _RemoteFetch(self, name=None, tag=None,
initial=False,
quiet=False):
quiet=False,
alt_dir=None):
if not name:
name = self.remote.name
@ -1316,29 +1347,9 @@ class Project(object):
if remote.PreConnectFetch():
ssh_proxy = True
bundle_dst = os.path.join(self.gitdir, 'clone.bundle')
bundle_tmp = os.path.join(self.gitdir, 'clone.bundle.tmp')
use_bundle = False
if os.path.exists(bundle_dst) or os.path.exists(bundle_tmp):
use_bundle = True
if initial:
alt = os.path.join(self.gitdir, 'objects/info/alternates')
try:
fd = open(alt, 'rb')
try:
ref_dir = fd.readline()
if ref_dir and ref_dir.endswith('\n'):
ref_dir = ref_dir[:-1]
finally:
fd.close()
except IOError, e:
ref_dir = None
if ref_dir and 'objects' == os.path.basename(ref_dir):
if use_bundle:
use_bundle = False
ref_dir = os.path.dirname(ref_dir)
if alt_dir and 'objects' == os.path.basename(alt_dir):
ref_dir = os.path.dirname(alt_dir)
packed_refs = os.path.join(self.gitdir, 'packed-refs')
remote = self.GetRemote(name)
@ -1374,10 +1385,8 @@ class Project(object):
old_packed += line
_lwrite(packed_refs, tmp_packed)
else:
ref_dir = None
use_bundle = True
alt_dir = None
cmd = ['fetch']
@ -1386,94 +1395,119 @@ class Project(object):
depth = self.manifest.manifestProject.config.GetString('repo.depth')
if depth and initial:
cmd.append('--depth=%s' % depth)
use_bundle = False
if quiet:
cmd.append('--quiet')
if not self.worktree:
cmd.append('--update-head-ok')
cmd.append(name)
if tag is not None:
cmd.append('tag')
cmd.append(tag)
if use_bundle and not os.path.exists(bundle_dst):
bundle_url = remote.url + '/clone.bundle'
bundle_url = GitConfig.ForUser().UrlInsteadOf(bundle_url)
if GetSchemeFromUrl(bundle_url) in ('http', 'https'):
use_bundle = self._FetchBundle(
bundle_url,
bundle_tmp,
bundle_dst,
quiet=quiet)
else:
use_bundle = False
if use_bundle:
if not quiet:
cmd.append('--quiet')
cmd.append(bundle_dst)
for f in remote.fetch:
cmd.append(str(f))
cmd.append('refs/tags/*:refs/tags/*')
else:
cmd.append(name)
if tag is not None:
cmd.append('tag')
cmd.append(tag)
ok = GitCommand(self,
cmd,
bare = True,
ssh_proxy = ssh_proxy).Wait() == 0
ok = False
for i in range(2):
if GitCommand(self, cmd, bare=True, ssh_proxy=ssh_proxy).Wait() == 0:
ok = True
break
time.sleep(random.randint(30, 45))
if initial:
if ref_dir:
if alt_dir:
if old_packed != '':
_lwrite(packed_refs, old_packed)
else:
os.remove(packed_refs)
self.bare_git.pack_refs('--all', '--prune')
return ok
def _ApplyCloneBundle(self, initial=False, quiet=False):
if initial and self.manifest.manifestProject.config.GetString('repo.depth'):
return False
remote = self.GetRemote(self.remote.name)
bundle_url = remote.url + '/clone.bundle'
bundle_url = GitConfig.ForUser().UrlInsteadOf(bundle_url)
if GetSchemeFromUrl(bundle_url) not in ('http', 'https'):
return False
bundle_dst = os.path.join(self.gitdir, 'clone.bundle')
bundle_tmp = os.path.join(self.gitdir, 'clone.bundle.tmp')
exist_dst = os.path.exists(bundle_dst)
exist_tmp = os.path.exists(bundle_tmp)
if not initial and not exist_dst and not exist_tmp:
return False
if not exist_dst:
exist_dst = self._FetchBundle(bundle_url, bundle_tmp, bundle_dst, quiet)
if not exist_dst:
return False
cmd = ['fetch']
if quiet:
cmd.append('--quiet')
if not self.worktree:
cmd.append('--update-head-ok')
cmd.append(bundle_dst)
for f in remote.fetch:
cmd.append(str(f))
cmd.append('refs/tags/*:refs/tags/*')
ok = GitCommand(self, cmd, bare=True).Wait() == 0
if os.path.exists(bundle_dst):
os.remove(bundle_dst)
if os.path.exists(bundle_tmp):
os.remove(bundle_tmp)
return ok
def _FetchBundle(self, srcUrl, tmpPath, dstPath, quiet=False):
def _FetchBundle(self, srcUrl, tmpPath, dstPath, quiet):
keep = True
done = False
dest = open(tmpPath, 'a+b')
try:
dest.seek(0, os.SEEK_END)
dest.seek(0, SEEK_END)
pos = dest.tell()
req = urllib2.Request(srcUrl)
if pos > 0:
req.add_header('Range', 'bytes=%d-' % pos)
_urllib_lock.acquire()
try:
r = urllib2.urlopen(req)
except urllib2.HTTPError, e:
if e.code == 404:
keep = False
return False
elif e.info()['content-type'] == 'text/plain':
try:
msg = e.read()
if len(msg) > 0 and msg[-1] == '\n':
msg = msg[0:-1]
msg = ' (%s)' % msg
except:
msg = ''
else:
try:
from BaseHTTPServer import BaseHTTPRequestHandler
res = BaseHTTPRequestHandler.responses[e.code]
msg = ' (%s: %s)' % (res[0], res[1])
except:
msg = ''
raise DownloadError('HTTP %s%s' % (e.code, msg))
except urllib2.URLError, e:
raise DownloadError('%s (%s)' % (e.reason, req.get_host()))
req = urllib2.Request(srcUrl)
if pos > 0:
req.add_header('Range', 'bytes=%d-' % pos)
try:
r = urllib2.urlopen(req)
except urllib2.HTTPError, e:
def _content_type():
try:
return e.info()['content-type']
except:
return None
if e.code == 404:
keep = False
return False
elif _content_type() == 'text/plain':
try:
msg = e.read()
if len(msg) > 0 and msg[-1] == '\n':
msg = msg[0:-1]
msg = ' (%s)' % msg
except:
msg = ''
else:
try:
from BaseHTTPServer import BaseHTTPRequestHandler
res = BaseHTTPRequestHandler.responses[e.code]
msg = ' (%s: %s)' % (res[0], res[1])
except:
msg = ''
raise DownloadError('HTTP %s%s' % (e.code, msg))
except urllib2.URLError, e:
raise DownloadError('%s: %s ' % (req.get_host(), str(e)))
finally:
_urllib_lock.release()
p = None
try:

View File

@ -165,6 +165,7 @@ See 'repo help --all' for a complete list of recognized commands.
print >>sys.stderr, "repo: '%s' is not a repo command." % name
sys.exit(1)
cmd.manifest = self.manifest
self._PrintCommandHelp(cmd)
else:

View File

@ -195,15 +195,11 @@ later is required to fix a server side protocol bug.
fetched.add(project.gitdir)
pm.update()
except BaseException, e:
# Notify the _Fetch() function about all errors.
except _FetchError:
err_event.set()
# If we got our own _FetchError, we don't want a stack trace.
# However, if we got something else (something in Sync_NetworkHalf?),
# we'd like one (so re-raise after we've set err_event).
if not isinstance(e, _FetchError):
raise
except:
err_event.set()
raise
finally:
if did_lock:
lock.release()