# # Copyright (C) 2008 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import stat import struct import zlib import cStringIO from import_ext import ImportExternal from error import ImportError class ImportZip(ImportExternal): """Streams a zip file from the network directly into a Project's Git repository. """ @classmethod def CanAccept(cls, url): """Can this importer read and unpack the data stored at url? """ if url.endswith('.zip') or url.endswith('.jar'): return True return False def _UnpackFiles(self): url_fd, url = self._OpenUrl() try: if not self.__class__.CanAccept(url): raise ImportError('non-zip file extension: %s' % url) zip = _ZipFile(url_fd) for entry in zip.FileRecords(): data = zip.Open(entry).read() sz = len(data) if data and _SafeCRLF(data): data = data.replace('\r\n', '\n') sz = len(data) fd = cStringIO.StringIO(data) self._UnpackOneFile(entry.mode, sz, entry.name, fd) zip.Close(entry) for entry in zip.CentralDirectory(): self._SetFileMode(entry.name, entry.mode) zip.CheckTail() finally: url_fd.close() def _SafeCRLF(data): """Is it reasonably safe to perform a CRLF->LF conversion? If the stream contains a NUL byte it is likely binary, and thus a CRLF->LF conversion may damage the stream. If the only NUL is in the last position of the stream, but it otherwise can do a CRLF<->LF conversion we do the CRLF conversion anyway. At least one source ZIP file has this structure in its source code. If every occurrance of a CR and LF is paired up as a CRLF pair then the conversion is safely bi-directional. s/\r\n/\n/g == s/\n/\r\\n/g can convert between them. """ nul = data.find('\0') if 0 <= nul and nul < (len(data) - 1): return False n_lf = 0 last = 0 while True: lf = data.find('\n', last) if lf < 0: break if lf == 0 or data[lf - 1] != '\r': return False last = lf + 1 n_lf += 1 return n_lf > 0 class _ZipFile(object): """Streaming iterator to parse a zip file on the fly. """ def __init__(self, fd): self._fd = _UngetStream(fd) def FileRecords(self): return _FileIter(self._fd) def CentralDirectory(self): return _CentIter(self._fd) def CheckTail(self): type_buf = self._fd.read(4) type = struct.unpack('> 16 else: self.mode = stat.S_IFREG | 0644 class _UngetStream(object): """File like object to read and rewind a stream. """ def __init__(self, fd): self._fd = fd self._buf = None def read(self, size = -1): r = [] try: if size >= 0: self._ReadChunk(r, size) else: while True: self._ReadChunk(r, 2048) except EOFError: pass if len(r) == 1: return r[0] return ''.join(r) def unread(self, buf): b = self._buf if b is None or len(b) == 0: self._buf = buf else: self._buf = buf + b def _ReadChunk(self, r, size): b = self._buf try: while size > 0: if b is None or len(b) == 0: b = self._Inflate(self._fd.read(2048)) if not b: raise EOFError() continue use = min(size, len(b)) r.append(b[:use]) b = b[use:] size -= use finally: self._buf = b def _Inflate(self, b): return b class _FixedLengthStream(_UngetStream): """File like object to read a fixed length stream. """ def __init__(self, fd, have): _UngetStream.__init__(self, fd) self._have = have def _Inflate(self, b): n = self._have if n == 0: self._fd.unread(b) return None if len(b) > n: self._fd.unread(b[n:]) b = b[:n] self._have -= len(b) return b class _InflateStream(_UngetStream): """Inflates the stream as it reads input. """ def __init__(self, fd): _UngetStream.__init__(self, fd) self._z = zlib.decompressobj(-zlib.MAX_WBITS) def _Inflate(self, b): z = self._z if not z: self._fd.unread(b) return None b = z.decompress(b) if z.unconsumed_tail != '': self._fd.unread(z.unconsumed_tail) elif z.unused_data != '': self._fd.unread(z.unused_data) self._z = None return b