Work around Python 2.7 urllib2 bug

If the remote is using authenticated HTTP, but does not have
$GIT_URL/clone.bundle files in each repository, an initial sync
would fail around 8 projects in due to the library not resetting
the number of failures after getting a 404.

Work around this by updating the retry counter ourselves.

The urllib2 library is also not thread-safe. Make it somewhat
safer by wrapping the critical section with a lock.

Change-Id: I886e2750ef4793cbe2150c3b5396eb9f10974f7f
Signed-off-by: Shawn O. Pearce <sop@google.com>
This commit is contained in:
Shawn O. Pearce 2011-10-11 12:00:38 -07:00
parent bf1fbb20ab
commit fab96c68e3
2 changed files with 53 additions and 33 deletions

11
main.py
View File

@ -273,6 +273,15 @@ class _UserAgentHandler(urllib2.BaseHandler):
req.add_header('User-Agent', _UserAgent()) req.add_header('User-Agent', _UserAgent())
return req return req
class _BasicAuthHandler(urllib2.HTTPBasicAuthHandler):
def http_error_auth_reqed(self, authreq, host, req, headers):
try:
return urllib2.AbstractBasicAuthHandler.http_error_auth_reqed(
self, authreq, host, req, headers)
except:
self.reset_retry_count()
raise
def init_http(): def init_http():
handlers = [_UserAgentHandler()] handlers = [_UserAgentHandler()]
@ -287,7 +296,7 @@ def init_http():
pass pass
except IOError: except IOError:
pass pass
handlers.append(urllib2.HTTPBasicAuthHandler(mgr)) handlers.append(_BasicAuthHandler(mgr))
if 'http_proxy' in os.environ: if 'http_proxy' in os.environ:
url = os.environ['http_proxy'] url = os.environ['http_proxy']

View File

@ -24,6 +24,11 @@ import sys
import time import time
import urllib2 import urllib2
try:
import threading as _threading
except ImportError:
import dummy_threading as _threading
from color import Coloring from color import Coloring
from git_command import GitCommand from git_command import GitCommand
from git_config import GitConfig, IsId, GetSchemeFromUrl from git_config import GitConfig, IsId, GetSchemeFromUrl
@ -34,6 +39,8 @@ from progress import Progress
from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M from git_refs import GitRefs, HEAD, R_HEADS, R_TAGS, R_PUB, R_M
_urllib_lock = _threading.Lock()
def _lwrite(path, content): def _lwrite(path, content):
lock = '%s.lock' % path lock = '%s.lock' % path
@ -1458,40 +1465,44 @@ class Project(object):
dest.seek(0, os.SEEK_END) dest.seek(0, os.SEEK_END)
pos = dest.tell() pos = dest.tell()
req = urllib2.Request(srcUrl) _urllib_lock.acquire()
if pos > 0:
req.add_header('Range', 'bytes=%d-' % pos)
try: try:
r = urllib2.urlopen(req) req = urllib2.Request(srcUrl)
except urllib2.HTTPError, e: if pos > 0:
def _content_type(): req.add_header('Range', 'bytes=%d-' % pos)
try:
return e.info()['content-type']
except:
return None
if e.code == 404: try:
keep = False r = urllib2.urlopen(req)
return False except urllib2.HTTPError, e:
elif _content_type() == 'text/plain': def _content_type():
try: try:
msg = e.read() return e.info()['content-type']
if len(msg) > 0 and msg[-1] == '\n': except:
msg = msg[0:-1] return None
msg = ' (%s)' % msg
except: if e.code == 404:
msg = '' keep = False
else: return False
try: elif _content_type() == 'text/plain':
from BaseHTTPServer import BaseHTTPRequestHandler try:
res = BaseHTTPRequestHandler.responses[e.code] msg = e.read()
msg = ' (%s: %s)' % (res[0], res[1]) if len(msg) > 0 and msg[-1] == '\n':
except: msg = msg[0:-1]
msg = '' msg = ' (%s)' % msg
raise DownloadError('HTTP %s%s' % (e.code, msg)) except:
except urllib2.URLError, e: msg = ''
raise DownloadError('%s: %s ' % (req.get_host(), str(e))) else:
try:
from BaseHTTPServer import BaseHTTPRequestHandler
res = BaseHTTPRequestHandler.responses[e.code]
msg = ' (%s: %s)' % (res[0], res[1])
except:
msg = ''
raise DownloadError('HTTP %s%s' % (e.code, msg))
except urllib2.URLError, e:
raise DownloadError('%s: %s ' % (req.get_host(), str(e)))
finally:
_urllib_lock.release()
p = None p = None
try: try: