from __future__ import absolute_import
from io import BytesIO
import struct
import subprocess
import zlib
import os
from zipfile import (
ZIP_STORED,
ZIP_DEFLATED,
)
from collections import OrderedDict
import mozpack.path as mozpath
from mozbuild.util import memoize
JAR_STORED = ZIP_STORED
JAR_DEFLATED = ZIP_DEFLATED
JAR_BROTLI = 0x81
MAX_WBITS = 15
class JarReaderError(Exception):
class JarWriterError(Exception):
class JarStruct(object):
TYPE_MAPPING = {'uint32': ('I', 4), 'uint16': ('H', 2)}
def __init__(self, data=None):
assert self.MAGIC and isinstance(self.STRUCT, OrderedDict)
self.size_fields = set(t for t in self.STRUCT.itervalues()
if t not in JarStruct.TYPE_MAPPING)
self._values = {}
if data:
self._init_data(data)
else:
self._init_empty()
def _init_data(self, data):
assert data is not None
self.signature, size = JarStruct.get_data('uint32', data)
if self.signature != self.MAGIC:
raise JarReaderError('Bad magic')
offset = size
sizes = dict((t, 0) for t in self.size_fields)
for name, t in self.STRUCT.iteritems():
if t in JarStruct.TYPE_MAPPING:
value, size = JarStruct.get_data(t, data[offset:])
else:
size = sizes[t]
value = data[offset:offset + size]
if isinstance(value, memoryview):
value = value.tobytes()
if name not in sizes:
self._values[name] = value
else:
sizes[name] = value
offset += size
def _init_empty(self):
self.signature = self.MAGIC
for name, t in self.STRUCT.iteritems():
if name in self.size_fields:
continue
self._values[name] = 0 if t in JarStruct.TYPE_MAPPING else ''
@staticmethod
def get_data(type, data):
assert type in JarStruct.TYPE_MAPPING
assert data is not None
format, size = JarStruct.TYPE_MAPPING[type]
data = data[:size]
if isinstance(data, memoryview):
data = data.tobytes()
return struct.unpack('<' + format, data)[0], size
def serialize(self):
serialized = struct.pack('<I', self.signature)
sizes = dict((t, name) for name, t in self.STRUCT.iteritems()
if t not in JarStruct.TYPE_MAPPING)
for name, t in self.STRUCT.iteritems():
if t in JarStruct.TYPE_MAPPING:
format, size = JarStruct.TYPE_MAPPING[t]
if name in sizes:
value = len(self[sizes[name]])
else:
value = self[name]
serialized += struct.pack('<' + format, value)
else:
serialized += self[name]
return serialized
@property
def size(self):
size = JarStruct.TYPE_MAPPING['uint32'][1]
for name, type in self.STRUCT.iteritems():
if type in JarStruct.TYPE_MAPPING:
size += JarStruct.TYPE_MAPPING[type][1]
else:
size += len(self[name])
return size
def __getitem__(self, key):
return self._values[key]
def __setitem__(self, key, value):
if key not in self.STRUCT:
raise KeyError(key)
if key in self.size_fields:
raise AttributeError("can't set attribute")
self._values[key] = value
def __contains__(self, key):
return key in self._values
def __iter__(self):
return self._values.iteritems()
def __repr__(self):
return "<%s %s>" % (self.__class__.__name__,
' '.join('%s=%s' % (n, v) for n, v in self))
class JarCdirEnd(JarStruct):
MAGIC = 0x06054b50
STRUCT = OrderedDict([
('disk_num', 'uint16'),
('cdir_disk', 'uint16'),
('disk_entries', 'uint16'),
('cdir_entries', 'uint16'),
('cdir_size', 'uint32'),
('cdir_offset', 'uint32'),
('comment_size', 'uint16'),
('comment', 'comment_size'),
])
CDIR_END_SIZE = JarCdirEnd().size
class JarCdirEntry(JarStruct):
MAGIC = 0x02014b50
STRUCT = OrderedDict([
('creator_version', 'uint16'),
('min_version', 'uint16'),
('general_flag', 'uint16'),
('compression', 'uint16'),
('lastmod_time', 'uint16'),
('lastmod_date', 'uint16'),
('crc32', 'uint32'),
('compressed_size', 'uint32'),
('uncompressed_size', 'uint32'),
('filename_size', 'uint16'),
('extrafield_size', 'uint16'),
('filecomment_size', 'uint16'),
('disknum', 'uint16'),
('internal_attr', 'uint16'),
('external_attr', 'uint32'),
('offset', 'uint32'),
('filename', 'filename_size'),
('extrafield', 'extrafield_size'),
('filecomment', 'filecomment_size'),
])
class JarLocalFileHeader(JarStruct):
MAGIC = 0x04034b50
STRUCT = OrderedDict([
('min_version', 'uint16'),
('general_flag', 'uint16'),
('compression', 'uint16'),
('lastmod_time', 'uint16'),
('lastmod_date', 'uint16'),
('crc32', 'uint32'),
('compressed_size', 'uint32'),
('uncompressed_size', 'uint32'),
('filename_size', 'uint16'),
('extra_field_size', 'uint16'),
('filename', 'filename_size'),
('extra_field', 'extra_field_size'),
])
class JarFileReader(object):
def __init__(self, header, data):
assert header['compression'] in [JAR_DEFLATED, JAR_STORED, JAR_BROTLI]
self._data = data
for name in ['filename', 'compressed_size',
'uncompressed_size', 'crc32']:
setattr(self, name, header[name])
self.compressed = header['compression'] != JAR_STORED
self.compress = header['compression']
def read(self, length=-1):
return self.uncompressed_data.read(length)
def readlines(self):
return self.read().splitlines(True)
def __iter__(self):
return iter(self.readlines())
def seek(self, pos, whence=os.SEEK_SET):
return self.uncompressed_data.seek(pos, whence)
def close(self):
self.uncompressed_data.close()
@property
def compressed_data(self):
return self._data[:self.compressed_size]
@property
def uncompressed_data(self):
if hasattr(self, '_uncompressed_data'):
return self._uncompressed_data
data = self.compressed_data
if self.compress == JAR_STORED:
data = data.tobytes()
elif self.compress == JAR_BROTLI:
data = Brotli.decompress(data.tobytes())
elif self.compress == JAR_DEFLATED:
data = zlib.decompress(data.tobytes(), -MAX_WBITS)
else:
assert False if len(data) != self.uncompressed_size:
raise JarReaderError('Corrupted file? %s' % self.filename)
self._uncompressed_data = BytesIO(data)
return self._uncompressed_data
class JarReader(object):
def __init__(self, file=None, fileobj=None, data=None):
if fileobj:
data = fileobj.read()
elif file:
data = open(file, 'rb').read()
self._data = memoryview(data)
offset = -CDIR_END_SIZE
while True:
signature = JarStruct.get_data('uint32', self._data[offset:])[0]
if signature == JarCdirEnd.MAGIC:
break
if offset == -len(self._data):
raise JarReaderError('Not a jar?')
offset -= 1
self._cdir_end = JarCdirEnd(self._data[offset:])
def close(self):
del self._data
@property
def compression(self):
entries = self.entries
if not entries:
return JAR_STORED
return max(f['compression'] for f in entries.itervalues())
@property
def entries(self):
if hasattr(self, '_entries'):
return self._entries
preload = 0
if self.is_optimized:
preload = JarStruct.get_data('uint32', self._data)[0]
entries = OrderedDict()
offset = self._cdir_end['cdir_offset']
for e in xrange(self._cdir_end['cdir_entries']):
entry = JarCdirEntry(self._data[offset:])
offset += entry.size
host = entry['creator_version'] >> 8
xattr = entry['external_attr']
if (host == 0 and xattr & 0x10) or (host == 3 and
xattr & (0o040000 << 16)):
continue
entries[entry['filename']] = entry
if entry['offset'] < preload:
self._last_preloaded = entry['filename']
self._entries = entries
return entries
@property
def is_optimized(self):
return self._cdir_end['cdir_offset'] == \
JarStruct.TYPE_MAPPING['uint32'][1]
@property
def last_preloaded(self):
if hasattr(self, '_last_preloaded'):
return self._last_preloaded
self._last_preloaded = None
self.entries
return self._last_preloaded
def _getreader(self, entry):
header = JarLocalFileHeader(self._data[entry['offset']:])
for key, value in entry:
if key in header and header[key] != value:
raise JarReaderError('Central directory and file header ' +
'mismatch. Corrupted archive?')
return JarFileReader(header,
self._data[entry['offset'] + header.size:])
def __iter__(self):
for entry in self.entries.itervalues():
yield self._getreader(entry)
def __getitem__(self, name):
return self._getreader(self.entries[name])
def __contains__(self, name):
return name in self.entries
class JarWriter(object):
def __init__(self, file=None, fileobj=None, compress=True, compress_level=9):
if fileobj:
self._data = fileobj
else:
self._data = open(file, 'wb')
if compress is True:
compress = JAR_DEFLATED
self._compress = compress
self._compress_level = compress_level
self._contents = OrderedDict()
self._last_preloaded = None
def __enter__(self):
return self
def __exit__(self, type, value, tb):
self.finish()
def finish(self):
offset = 0
headers = {}
preload_size = 0
for entry, content in self._contents.itervalues():
header = JarLocalFileHeader()
for name in entry.STRUCT:
if name in header:
header[name] = entry[name]
entry['offset'] = offset
offset += len(content) + header.size
if entry['filename'] == self._last_preloaded:
preload_size = offset
headers[entry] = header
end = JarCdirEnd()
end['disk_entries'] = len(self._contents)
end['cdir_entries'] = end['disk_entries']
end['cdir_size'] = reduce(lambda x, y: x + y[0].size,
self._contents.values(), 0)
if preload_size:
end['cdir_offset'] = 4
offset = end['cdir_size'] + end['cdir_offset'] + end.size
preload_size += offset
self._data.write(struct.pack('<I', preload_size))
for entry, _ in self._contents.itervalues():
entry['offset'] += offset
self._data.write(entry.serialize())
self._data.write(end.serialize())
for entry, content in self._contents.itervalues():
self._data.write(headers[entry].serialize())
self._data.write(content)
if not preload_size:
end['cdir_offset'] = offset
for entry, _ in self._contents.itervalues():
self._data.write(entry.serialize())
self._data.write(end.serialize())
self._data.close()
def add(self, name, data, compress=None, mode=None, skip_duplicates=False):
name = mozpath.normsep(name)
if name in self._contents and not skip_duplicates:
raise JarWriterError("File %s already in JarWriter" % name)
if compress is None:
compress = self._compress
if compress is True:
compress = JAR_DEFLATED
if compress is False:
compress = JAR_STORED
if (isinstance(data, (JarFileReader, Deflater)) and
data.compress == compress):
deflater = data
else:
deflater = Deflater(compress, compress_level=self._compress_level)
if isinstance(data, basestring):
deflater.write(data)
elif hasattr(data, 'read'):
if hasattr(data, 'seek'):
data.seek(0)
deflater.write(data.read())
else:
raise JarWriterError("Don't know how to handle %s" %
type(data))
entry = JarCdirEntry()
entry['creator_version'] = 20
if mode is not None:
entry['creator_version'] |= 3 << 8
entry['external_attr'] = (mode & 0xFFFF) << 16
if deflater.compressed:
entry['min_version'] = 20 entry['general_flag'] = 2 entry['compression'] = deflater.compress
else:
entry['min_version'] = 10 entry['general_flag'] = 0
entry['compression'] = JAR_STORED
entry['lastmod_date'] = ((2010 - 1980) << 9) | (1 << 5) | 1
entry['lastmod_time'] = 0
entry['crc32'] = deflater.crc32
entry['compressed_size'] = deflater.compressed_size
entry['uncompressed_size'] = deflater.uncompressed_size
entry['filename'] = name
self._contents[name] = entry, deflater.compressed_data
def preload(self, files):
new_contents = OrderedDict()
for f in files:
if f not in self._contents:
continue
new_contents[f] = self._contents[f]
self._last_preloaded = f
for f in self._contents:
if f not in new_contents:
new_contents[f] = self._contents[f]
self._contents = new_contents
class Deflater(object):
def __init__(self, compress=True, compress_level=9):
self._data = BytesIO()
if compress is True:
compress = JAR_DEFLATED
elif compress is False:
compress = JAR_STORED
self.compress = compress
if compress in (JAR_DEFLATED, JAR_BROTLI):
if compress == JAR_DEFLATED:
self._deflater = zlib.compressobj(
compress_level, zlib.DEFLATED, -MAX_WBITS)
else:
self._deflater = BrotliCompress()
self._deflated = BytesIO()
else:
assert compress == JAR_STORED
self._deflater = None
self.crc32 = 0
def write(self, data):
self._data.write(data)
if isinstance(data, memoryview):
data = data.tobytes()
if self.compress:
if self._deflater:
self._deflated.write(self._deflater.compress(data))
else:
raise JarWriterError("Can't write after flush")
self.crc32 = zlib.crc32(data, self.crc32) & 0xffffffff
def close(self):
self._data.close()
if self.compress:
self._deflated.close()
def _flush(self):
if self.compress and self._deflater:
self._deflated.write(self._deflater.flush())
self._deflater = None
@property
def compressed(self):
return self._compressed_size < self.uncompressed_size
@property
def _compressed_size(self):
if self.compress:
self._flush()
return self._deflated.tell()
return self.uncompressed_size
@property
def compressed_size(self):
if self.compressed:
return self._compressed_size
return self.uncompressed_size
@property
def uncompressed_size(self):
return self._data.tell()
@property
def compressed_data(self):
if self.compressed:
return self._deflated.getvalue()
return self._data.getvalue()
class Brotli(object):
@staticmethod
@memoize
def brotli_tool():
from buildconfig import topobjdir, substs
return os.path.join(topobjdir, 'dist', 'host', 'bin',
'bro' + substs.get('BIN_SUFFIX', ''))
@staticmethod
def run_brotli_tool(args, input):
proc = subprocess.Popen([Brotli.brotli_tool()] + args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
(stdout, _) = proc.communicate(input)
ret = proc.wait()
if ret != 0:
raise Exception("Brotli compression failed")
return stdout
@staticmethod
def compress(data):
return Brotli.run_brotli_tool(['--window', '17'], data)
@staticmethod
def decompress(data):
return Brotli.run_brotli_tool(['--decompress'], data)
class BrotliCompress(object):
def __init__(self):
self._buf = BytesIO()
def compress(self, data):
self._buf.write(data)
return b''
def flush(self):
return Brotli.compress(self._buf.getvalue())
class JarLog(dict):
def __init__(self, file=None, fileobj=None):
if not fileobj:
fileobj = open(file, 'r')
for line in fileobj:
jar, path = line.strip().split(None, 1)
if not jar or not path:
continue
entry = self.setdefault(jar, [])
if path not in entry:
entry.append(path)