mirror of
https://gerrit.googlesource.com/git-repo
synced 2025-01-04 16:14:25 +00:00
346 lines
8.8 KiB
Python
346 lines
8.8 KiB
Python
|
#
|
||
|
# Copyright (C) 2008 The Android Open Source Project
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
import stat
|
||
|
import struct
|
||
|
import zlib
|
||
|
import cStringIO
|
||
|
|
||
|
from import_ext import ImportExternal
|
||
|
from error import ImportError
|
||
|
|
||
|
class ImportZip(ImportExternal):
|
||
|
"""Streams a zip file from the network directly into a Project's
|
||
|
Git repository.
|
||
|
"""
|
||
|
@classmethod
|
||
|
def CanAccept(cls, url):
|
||
|
"""Can this importer read and unpack the data stored at url?
|
||
|
"""
|
||
|
if url.endswith('.zip') or url.endswith('.jar'):
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
def _UnpackFiles(self):
|
||
|
url_fd, url = self._OpenUrl()
|
||
|
try:
|
||
|
if not self.__class__.CanAccept(url):
|
||
|
raise ImportError('non-zip file extension: %s' % url)
|
||
|
|
||
|
zip = _ZipFile(url_fd)
|
||
|
for entry in zip.FileRecords():
|
||
|
data = zip.Open(entry).read()
|
||
|
sz = len(data)
|
||
|
|
||
|
if data and _SafeCRLF(data):
|
||
|
data = data.replace('\r\n', '\n')
|
||
|
sz = len(data)
|
||
|
|
||
|
fd = cStringIO.StringIO(data)
|
||
|
self._UnpackOneFile(entry.mode, sz, entry.name, fd)
|
||
|
zip.Close(entry)
|
||
|
|
||
|
for entry in zip.CentralDirectory():
|
||
|
self._SetFileMode(entry.name, entry.mode)
|
||
|
|
||
|
zip.CheckTail()
|
||
|
finally:
|
||
|
url_fd.close()
|
||
|
|
||
|
|
||
|
def _SafeCRLF(data):
|
||
|
"""Is it reasonably safe to perform a CRLF->LF conversion?
|
||
|
|
||
|
If the stream contains a NUL byte it is likely binary,
|
||
|
and thus a CRLF->LF conversion may damage the stream.
|
||
|
|
||
|
If the only NUL is in the last position of the stream,
|
||
|
but it otherwise can do a CRLF<->LF conversion we do
|
||
|
the CRLF conversion anyway. At least one source ZIP
|
||
|
file has this structure in its source code.
|
||
|
|
||
|
If every occurrance of a CR and LF is paired up as a
|
||
|
CRLF pair then the conversion is safely bi-directional.
|
||
|
s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
|
||
|
"""
|
||
|
nul = data.find('\0')
|
||
|
if 0 <= nul and nul < (len(data) - 1):
|
||
|
return False
|
||
|
|
||
|
n_lf = 0
|
||
|
last = 0
|
||
|
while True:
|
||
|
lf = data.find('\n', last)
|
||
|
if lf < 0:
|
||
|
break
|
||
|
if lf == 0 or data[lf - 1] != '\r':
|
||
|
return False
|
||
|
last = lf + 1
|
||
|
n_lf += 1
|
||
|
return n_lf > 0
|
||
|
|
||
|
class _ZipFile(object):
|
||
|
"""Streaming iterator to parse a zip file on the fly.
|
||
|
"""
|
||
|
def __init__(self, fd):
|
||
|
self._fd = _UngetStream(fd)
|
||
|
|
||
|
def FileRecords(self):
|
||
|
return _FileIter(self._fd)
|
||
|
|
||
|
def CentralDirectory(self):
|
||
|
return _CentIter(self._fd)
|
||
|
|
||
|
def CheckTail(self):
|
||
|
type_buf = self._fd.read(4)
|
||
|
type = struct.unpack('<I', type_buf)[0]
|
||
|
if type != 0x06054b50: # end of central directory
|
||
|
raise ImportError('zip record %x unsupported' % type)
|
||
|
|
||
|
def Open(self, entry):
|
||
|
if entry.is_compressed:
|
||
|
return _InflateStream(self._fd)
|
||
|
else:
|
||
|
if entry.has_trailer:
|
||
|
raise ImportError('unable to extract streamed zip')
|
||
|
return _FixedLengthStream(self._fd, entry.uncompressed_size)
|
||
|
|
||
|
def Close(self, entry):
|
||
|
if entry.has_trailer:
|
||
|
type = struct.unpack('<I', self._fd.read(4))[0]
|
||
|
if type == 0x08074b50:
|
||
|
# Not a formal type marker, but commonly seen in zips
|
||
|
# as the data descriptor signature.
|
||
|
#
|
||
|
struct.unpack('<3I', self._fd.read(12))
|
||
|
else:
|
||
|
# No signature for the data descriptor, so read the
|
||
|
# remaining fields out of the stream
|
||
|
#
|
||
|
self._fd.read(8)
|
||
|
|
||
|
|
||
|
class _FileIter(object):
|
||
|
def __init__(self, fd):
|
||
|
self._fd = fd
|
||
|
|
||
|
def __iter__(self):
|
||
|
return self
|
||
|
|
||
|
def next(self):
|
||
|
fd = self._fd
|
||
|
|
||
|
type_buf = fd.read(4)
|
||
|
type = struct.unpack('<I', type_buf)[0]
|
||
|
|
||
|
if type != 0x04034b50: # local file header
|
||
|
fd.unread(type_buf)
|
||
|
raise StopIteration()
|
||
|
|
||
|
rec = _FileHeader(fd.read(26))
|
||
|
rec.name = fd.read(rec.name_len)
|
||
|
fd.read(rec.extra_len)
|
||
|
|
||
|
if rec.name.endswith('/'):
|
||
|
rec.name = rec.name[:-1]
|
||
|
rec.mode = stat.S_IFDIR | 0777
|
||
|
return rec
|
||
|
|
||
|
|
||
|
class _FileHeader(object):
|
||
|
"""Information about a single file in the archive.
|
||
|
0 version needed to extract 2 bytes
|
||
|
1 general purpose bit flag 2 bytes
|
||
|
2 compression method 2 bytes
|
||
|
3 last mod file time 2 bytes
|
||
|
4 last mod file date 2 bytes
|
||
|
5 crc-32 4 bytes
|
||
|
6 compressed size 4 bytes
|
||
|
7 uncompressed size 4 bytes
|
||
|
8 file name length 2 bytes
|
||
|
9 extra field length 2 bytes
|
||
|
"""
|
||
|
def __init__(self, raw_bin):
|
||
|
rec = struct.unpack('<5H3I2H', raw_bin)
|
||
|
|
||
|
if rec[2] == 8:
|
||
|
self.is_compressed = True
|
||
|
elif rec[2] == 0:
|
||
|
self.is_compressed = False
|
||
|
else:
|
||
|
raise ImportError('unrecognized compression format')
|
||
|
|
||
|
if rec[1] & (1 << 3):
|
||
|
self.has_trailer = True
|
||
|
else:
|
||
|
self.has_trailer = False
|
||
|
|
||
|
self.compressed_size = rec[6]
|
||
|
self.uncompressed_size = rec[7]
|
||
|
self.name_len = rec[8]
|
||
|
self.extra_len = rec[9]
|
||
|
self.mode = stat.S_IFREG | 0644
|
||
|
|
||
|
|
||
|
class _CentIter(object):
|
||
|
def __init__(self, fd):
|
||
|
self._fd = fd
|
||
|
|
||
|
def __iter__(self):
|
||
|
return self
|
||
|
|
||
|
def next(self):
|
||
|
fd = self._fd
|
||
|
|
||
|
type_buf = fd.read(4)
|
||
|
type = struct.unpack('<I', type_buf)[0]
|
||
|
|
||
|
if type != 0x02014b50: # central directory
|
||
|
fd.unread(type_buf)
|
||
|
raise StopIteration()
|
||
|
|
||
|
rec = _CentHeader(fd.read(42))
|
||
|
rec.name = fd.read(rec.name_len)
|
||
|
fd.read(rec.extra_len)
|
||
|
fd.read(rec.comment_len)
|
||
|
|
||
|
if rec.name.endswith('/'):
|
||
|
rec.name = rec.name[:-1]
|
||
|
rec.mode = stat.S_IFDIR | 0777
|
||
|
return rec
|
||
|
|
||
|
|
||
|
class _CentHeader(object):
|
||
|
"""Information about a single file in the archive.
|
||
|
0 version made by 2 bytes
|
||
|
1 version needed to extract 2 bytes
|
||
|
2 general purpose bit flag 2 bytes
|
||
|
3 compression method 2 bytes
|
||
|
4 last mod file time 2 bytes
|
||
|
5 last mod file date 2 bytes
|
||
|
6 crc-32 4 bytes
|
||
|
7 compressed size 4 bytes
|
||
|
8 uncompressed size 4 bytes
|
||
|
9 file name length 2 bytes
|
||
|
10 extra field length 2 bytes
|
||
|
11 file comment length 2 bytes
|
||
|
12 disk number start 2 bytes
|
||
|
13 internal file attributes 2 bytes
|
||
|
14 external file attributes 4 bytes
|
||
|
15 relative offset of local header 4 bytes
|
||
|
"""
|
||
|
def __init__(self, raw_bin):
|
||
|
rec = struct.unpack('<6H3I5H2I', raw_bin)
|
||
|
self.name_len = rec[9]
|
||
|
self.extra_len = rec[10]
|
||
|
self.comment_len = rec[11]
|
||
|
|
||
|
if (rec[0] & 0xff00) == 0x0300: # UNIX
|
||
|
self.mode = rec[14] >> 16
|
||
|
else:
|
||
|
self.mode = stat.S_IFREG | 0644
|
||
|
|
||
|
|
||
|
class _UngetStream(object):
|
||
|
"""File like object to read and rewind a stream.
|
||
|
"""
|
||
|
def __init__(self, fd):
|
||
|
self._fd = fd
|
||
|
self._buf = None
|
||
|
|
||
|
def read(self, size = -1):
|
||
|
r = []
|
||
|
try:
|
||
|
if size >= 0:
|
||
|
self._ReadChunk(r, size)
|
||
|
else:
|
||
|
while True:
|
||
|
self._ReadChunk(r, 2048)
|
||
|
except EOFError:
|
||
|
pass
|
||
|
|
||
|
if len(r) == 1:
|
||
|
return r[0]
|
||
|
return ''.join(r)
|
||
|
|
||
|
def unread(self, buf):
|
||
|
b = self._buf
|
||
|
if b is None or len(b) == 0:
|
||
|
self._buf = buf
|
||
|
else:
|
||
|
self._buf = buf + b
|
||
|
|
||
|
def _ReadChunk(self, r, size):
|
||
|
b = self._buf
|
||
|
try:
|
||
|
while size > 0:
|
||
|
if b is None or len(b) == 0:
|
||
|
b = self._Inflate(self._fd.read(2048))
|
||
|
if not b:
|
||
|
raise EOFError()
|
||
|
continue
|
||
|
|
||
|
use = min(size, len(b))
|
||
|
r.append(b[:use])
|
||
|
b = b[use:]
|
||
|
size -= use
|
||
|
finally:
|
||
|
self._buf = b
|
||
|
|
||
|
def _Inflate(self, b):
|
||
|
return b
|
||
|
|
||
|
|
||
|
class _FixedLengthStream(_UngetStream):
|
||
|
"""File like object to read a fixed length stream.
|
||
|
"""
|
||
|
def __init__(self, fd, have):
|
||
|
_UngetStream.__init__(self, fd)
|
||
|
self._have = have
|
||
|
|
||
|
def _Inflate(self, b):
|
||
|
n = self._have
|
||
|
if n == 0:
|
||
|
self._fd.unread(b)
|
||
|
return None
|
||
|
|
||
|
if len(b) > n:
|
||
|
self._fd.unread(b[n:])
|
||
|
b = b[:n]
|
||
|
self._have -= len(b)
|
||
|
return b
|
||
|
|
||
|
|
||
|
class _InflateStream(_UngetStream):
|
||
|
"""Inflates the stream as it reads input.
|
||
|
"""
|
||
|
def __init__(self, fd):
|
||
|
_UngetStream.__init__(self, fd)
|
||
|
self._z = zlib.decompressobj(-zlib.MAX_WBITS)
|
||
|
|
||
|
def _Inflate(self, b):
|
||
|
z = self._z
|
||
|
if not z:
|
||
|
self._fd.unread(b)
|
||
|
return None
|
||
|
|
||
|
b = z.decompress(b)
|
||
|
if z.unconsumed_tail != '':
|
||
|
self._fd.unread(z.unconsumed_tail)
|
||
|
elif z.unused_data != '':
|
||
|
self._fd.unread(z.unused_data)
|
||
|
self._z = None
|
||
|
return b
|