use alloc::{string::String, vec::Vec};
use bytes::{BufMut, BytesMut};
use crc32fast::Hasher as Crc32Hasher;
const SIG_LOCAL: u32 = 0x0403_4b50;
const SIG_DATA_DESC: u32 = 0x0807_4b50;
const SIG_CENTRAL: u32 = 0x0201_4b50;
const SIG_ZIP64_EOCD: u32 = 0x0606_4b50;
const SIG_ZIP64_EOCD_LOC: u32 = 0x0706_4b50;
const SIG_EOCD: u32 = 0x0605_4b50;
const TAG_ZIP64: u16 = 0x0001;
const VERSION_NEEDED: u16 = 45;
const VERSION_MADE_BY: u16 = (3 << 8) | 0x2D;
#[derive(Clone, Copy, Debug, Default)]
pub struct MsDosDateTime {
time: u16,
date: u16,
}
impl MsDosDateTime {
#[must_use]
pub const fn new(year: u16, month: u16, day: u16, hour: u16, minute: u16, second: u16) -> Self {
Self {
time: (hour << 11) | (minute << 5) | (second / 2),
date: ((year - 1980) << 9) | (month << 5) | day,
}
}
}
#[cfg(feature = "jiff")]
impl From<jiff::civil::DateTime> for MsDosDateTime {
#[expect(
clippy::cast_sign_loss,
reason = "jiff date/time components are non-negative"
)]
fn from(dt: jiff::civil::DateTime) -> Self {
Self::new(
dt.year() as u16,
dt.month() as u16,
dt.day() as u16,
dt.hour() as u16,
dt.minute() as u16,
dt.second() as u16,
)
}
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[non_exhaustive]
#[repr(u16)]
pub enum CompressionMethod {
#[default]
Stored = 0,
Deflate = 8,
Bzip2 = 12,
Lzma = 14,
Zstd = 93,
}
struct CdEntry {
path: String,
modified: MsDosDateTime,
method: CompressionMethod,
crc32: u32,
compressed_size: u64,
uncompressed_size: u64,
local_offset: u64,
}
struct ActiveFile {
path: String,
modified: MsDosDateTime,
method: CompressionMethod,
uncompressed_size: u64,
local_offset: u64,
crc: Crc32Hasher,
}
pub struct ZipArchive {
cd: Vec<CdEntry>,
offset: u64,
active: Option<ActiveFile>,
}
impl ZipArchive {
#[must_use]
pub const fn new() -> Self {
Self {
cd: Vec::new(),
offset: 0,
active: None,
}
}
pub fn start_file(
&mut self,
path: String,
modified: MsDosDateTime,
method: CompressionMethod,
buf: &mut BytesMut,
) {
assert!(self.active.is_none(), "previous file not ended");
let local_offset = self.offset;
let before = buf.len();
encode_local_header(&path, modified, method, buf);
self.offset += (buf.len() - before) as u64;
self.active = Some(ActiveFile {
path,
modified,
method,
uncompressed_size: 0,
local_offset,
crc: Crc32Hasher::new(),
});
}
pub fn file_data(&mut self, data: &[u8]) {
let active = self.active.as_mut().expect("no active file");
active.crc.update(data);
active.uncompressed_size += data.len() as u64;
if active.method == CompressionMethod::Stored {
self.offset += data.len() as u64;
}
}
pub fn end_file(&mut self, buf: &mut BytesMut) {
let active = self.active.take().expect("no active file");
let crc32 = active.crc.finalize();
let size = active.uncompressed_size;
let before = buf.len();
encode_data_descriptor(crc32, size, size, buf);
self.offset += (buf.len() - before) as u64;
self.cd.push(CdEntry {
path: active.path,
modified: active.modified,
method: active.method,
crc32,
compressed_size: size,
uncompressed_size: size,
local_offset: active.local_offset,
});
}
pub fn end_file_compressed(&mut self, compressed_size: u64, buf: &mut BytesMut) {
let active = self.active.take().expect("no active file");
let crc32 = active.crc.finalize();
self.offset += compressed_size;
let before = buf.len();
encode_data_descriptor(crc32, compressed_size, active.uncompressed_size, buf);
self.offset += (buf.len() - before) as u64;
self.cd.push(CdEntry {
path: active.path,
modified: active.modified,
method: active.method,
crc32,
compressed_size,
uncompressed_size: active.uncompressed_size,
local_offset: active.local_offset,
});
}
pub fn add_directory(&mut self, path: String, modified: MsDosDateTime, buf: &mut BytesMut) {
assert!(self.active.is_none(), "previous file not ended");
let local_offset = self.offset;
let before = buf.len();
encode_local_header(&path, modified, CompressionMethod::Stored, buf);
encode_data_descriptor(0, 0, 0, buf);
self.offset += (buf.len() - before) as u64;
self.cd.push(CdEntry {
path,
modified,
method: CompressionMethod::Stored,
crc32: 0,
compressed_size: 0,
uncompressed_size: 0,
local_offset,
});
}
pub fn finish(&mut self, buf: &mut BytesMut) {
assert!(self.active.is_none(), "file not ended before finish");
let cd_start = self.offset;
let before = buf.len();
for e in &self.cd {
encode_cd_entry(e, buf);
}
let cd_size = (buf.len() - before) as u64;
self.offset += cd_size;
let zip64_eocd_offset = self.offset;
encode_zip64_eocd(self.cd.len() as u64, cd_size, cd_start, buf);
encode_zip64_eocd_locator(zip64_eocd_offset, buf);
encode_eocd(buf);
}
}
impl Default for ZipArchive {
fn default() -> Self {
Self::new()
}
}
#[expect(
clippy::cast_possible_truncation,
reason = "file names > 64 KiB are unsupported"
)]
fn encode_local_header(
path: &str,
modified: MsDosDateTime,
method: CompressionMethod,
b: &mut BytesMut,
) {
let name = path.as_bytes();
b.reserve(30 + name.len());
b.put_u32_le(SIG_LOCAL);
b.put_u16_le(VERSION_NEEDED);
b.put_u16_le(0x0808); b.put_u16_le(method as u16);
b.put_u16_le(modified.time);
b.put_u16_le(modified.date);
b.put_u32_le(0); b.put_u32_le(0); b.put_u32_le(0); b.put_u16_le(name.len() as u16);
b.put_u16_le(0); b.put_slice(name);
}
fn encode_data_descriptor(
crc32: u32,
compressed_size: u64,
uncompressed_size: u64,
b: &mut BytesMut,
) {
b.reserve(24);
b.put_u32_le(SIG_DATA_DESC);
b.put_u32_le(crc32);
b.put_u64_le(compressed_size);
b.put_u64_le(uncompressed_size);
}
const CD_EXTRA_LEN: u16 = 28;
#[expect(
clippy::cast_possible_truncation,
reason = "file names > 64 KiB are unsupported"
)]
fn encode_cd_entry(e: &CdEntry, b: &mut BytesMut) {
let name = e.path.as_bytes();
let external_attr: u32 = if e.path.ends_with('/') {
0o40_755 << 16 } else {
0o100_644 << 16 };
b.reserve(46 + name.len() + CD_EXTRA_LEN as usize);
b.put_u32_le(SIG_CENTRAL);
b.put_u16_le(VERSION_MADE_BY);
b.put_u16_le(VERSION_NEEDED);
b.put_u16_le(0x0808); b.put_u16_le(e.method as u16);
b.put_u16_le(e.modified.time);
b.put_u16_le(e.modified.date);
b.put_u32_le(e.crc32);
b.put_u32_le(0xFFFF_FFFF); b.put_u32_le(0xFFFF_FFFF); b.put_u16_le(name.len() as u16);
b.put_u16_le(CD_EXTRA_LEN);
b.put_u16_le(0); b.put_u16_le(0); b.put_u16_le(0); b.put_u32_le(external_attr);
b.put_u32_le(0xFFFF_FFFF); b.put_slice(name);
b.put_u16_le(TAG_ZIP64);
b.put_u16_le(24); b.put_u64_le(e.uncompressed_size);
b.put_u64_le(e.compressed_size);
b.put_u64_le(e.local_offset);
}
fn encode_zip64_eocd(num_entries: u64, cd_size: u64, cd_offset: u64, b: &mut BytesMut) {
b.reserve(56);
b.put_u32_le(SIG_ZIP64_EOCD);
b.put_u64_le(44); b.put_u16_le(VERSION_MADE_BY);
b.put_u16_le(VERSION_NEEDED);
b.put_u32_le(0); b.put_u32_le(0); b.put_u64_le(num_entries); b.put_u64_le(num_entries); b.put_u64_le(cd_size);
b.put_u64_le(cd_offset);
}
fn encode_zip64_eocd_locator(zip64_eocd_offset: u64, b: &mut BytesMut) {
b.reserve(20);
b.put_u32_le(SIG_ZIP64_EOCD_LOC);
b.put_u32_le(0); b.put_u64_le(zip64_eocd_offset);
b.put_u32_le(1); }
fn encode_eocd(b: &mut BytesMut) {
b.reserve(22);
b.put_u32_le(SIG_EOCD);
b.put_u16_le(0xFFFF); b.put_u16_le(0xFFFF); b.put_u16_le(0xFFFF); b.put_u16_le(0xFFFF); b.put_u32_le(0xFFFF_FFFF); b.put_u32_le(0xFFFF_FFFF); b.put_u16_le(0); }
#[cfg(test)]
#[expect(clippy::cast_possible_truncation, reason = "test data is small")]
mod tests {
use super::*;
fn u16le(b: &[u8], off: usize) -> u16 {
u16::from_le_bytes(b[off..off + 2].try_into().unwrap())
}
fn u32le(b: &[u8], off: usize) -> u32 {
u32::from_le_bytes(b[off..off + 4].try_into().unwrap())
}
fn u64le(b: &[u8], off: usize) -> u64 {
u64::from_le_bytes(b[off..off + 8].try_into().unwrap())
}
fn collect_archive(f: impl FnOnce(&mut ZipArchive, &mut Vec<u8>)) -> Vec<u8> {
let mut archive = ZipArchive::new();
let mut out = Vec::new();
f(&mut archive, &mut out);
out
}
fn emit(buf: &mut BytesMut, out: &mut Vec<u8>) {
out.extend_from_slice(buf);
buf.clear();
}
#[test]
fn empty_zip_structure() {
let zip = collect_archive(|archive, out| {
let mut buf = BytesMut::new();
archive.finish(&mut buf);
emit(&mut buf, out);
});
assert_eq!(zip.len(), 98);
assert_eq!(u32le(&zip, 0), SIG_ZIP64_EOCD, "zip64 eocd sig");
assert_eq!(u64le(&zip, 24), 0u64, "entries on disk");
assert_eq!(u64le(&zip, 32), 0u64, "total entries");
assert_eq!(u64le(&zip, 40), 0u64, "cd size");
assert_eq!(u64le(&zip, 48), 0u64, "cd offset");
assert_eq!(u32le(&zip, 56), SIG_ZIP64_EOCD_LOC, "locator sig");
assert_eq!(u64le(&zip, 56 + 8), 0u64, "zip64 eocd at offset 0");
assert_eq!(u32le(&zip, 76), SIG_EOCD, "std eocd sig");
assert_eq!(zip.len(), 98);
}
#[test]
fn single_file_structure() {
let content = b"hello, zip!"; let name = "hello.txt";
let zip = collect_archive(|archive, out| {
let mut buf = BytesMut::new();
archive.start_file(
name.into(),
MsDosDateTime::default(),
CompressionMethod::Stored,
&mut buf,
);
emit(&mut buf, out);
for chunk in content.chunks(8) {
archive.file_data(chunk);
out.extend_from_slice(chunk);
}
archive.end_file(&mut buf);
emit(&mut buf, out);
archive.finish(&mut buf);
emit(&mut buf, out);
});
assert_eq!(u32le(&zip, 0), SIG_LOCAL, "local sig");
assert_eq!(u16le(&zip, 4), VERSION_NEEDED, "version needed");
assert_eq!(u16le(&zip, 6), 0x0808, "GP bit 3 + bit 11");
assert_eq!(u32le(&zip, 14), 0, "crc deferred");
assert_eq!(u32le(&zip, 18), 0, "comp size deferred");
assert_eq!(u32le(&zip, 22), 0, "orig size deferred");
let name_len = u16le(&zip, 26) as usize;
assert_eq!(name_len, 9);
let extra_len = u16le(&zip, 28) as usize;
assert_eq!(extra_len, 0);
assert_eq!(&zip[30..30 + name_len], name.as_bytes());
let dd = 30 + name_len + content.len();
assert_eq!(u32le(&zip, dd), SIG_DATA_DESC);
let expected_crc = {
let mut h = Crc32Hasher::new();
h.update(content);
h.finalize()
};
assert_eq!(u32le(&zip, dd + 4), expected_crc, "crc32");
assert_eq!(u64le(&zip, dd + 8), content.len() as u64, "comp size64");
assert_eq!(u64le(&zip, dd + 16), content.len() as u64, "orig size64");
let cd = dd + 24;
assert_eq!(u32le(&zip, cd), SIG_CENTRAL);
assert_eq!(u16le(&zip, cd + 4), VERSION_MADE_BY);
assert_eq!(u32le(&zip, cd + 20), 0xFFFF_FFFF, "comp size sentinel");
assert_eq!(u32le(&zip, cd + 24), 0xFFFF_FFFF, "orig size sentinel");
assert_eq!(u32le(&zip, cd + 42), 0xFFFF_FFFF, "offset sentinel");
let cd_name_len = u16le(&zip, cd + 28) as usize;
let cd_extra_len = u16le(&zip, cd + 30) as usize;
assert_eq!(cd_name_len, 9);
assert_eq!(cd_extra_len, 28);
assert_eq!(u32le(&zip, cd + 16), expected_crc, "cd crc32");
let cex = cd + 46 + cd_name_len;
assert_eq!(u16le(&zip, cex), TAG_ZIP64);
assert_eq!(u16le(&zip, cex + 2), 24);
assert_eq!(u64le(&zip, cex + 4), content.len() as u64, "cd orig size64");
assert_eq!(
u64le(&zip, cex + 12),
content.len() as u64,
"cd comp size64"
);
assert_eq!(u64le(&zip, cex + 20), 0u64, "local offset = 0");
let cd_entry_size = 46 + cd_name_len + cd_extra_len;
let z64 = cd + cd_entry_size;
assert_eq!(u32le(&zip, z64), SIG_ZIP64_EOCD);
assert_eq!(u64le(&zip, z64 + 24), 1u64, "one entry");
assert_eq!(u64le(&zip, z64 + 40), cd_entry_size as u64, "cd size");
assert_eq!(u64le(&zip, z64 + 48), cd as u64, "cd offset");
let loc = z64 + 56;
assert_eq!(u32le(&zip, loc), SIG_ZIP64_EOCD_LOC);
assert_eq!(u64le(&zip, loc + 8), z64 as u64, "zip64 eocd offset");
let eocd = loc + 20;
assert_eq!(u32le(&zip, eocd), SIG_EOCD);
assert_eq!(zip.len(), eocd + 22);
}
#[test]
fn directory_entry() {
let path = "subdir/";
let zip = collect_archive(|archive, out| {
let mut buf = BytesMut::new();
archive.add_directory(path.into(), MsDosDateTime::default(), &mut buf);
emit(&mut buf, out);
archive.finish(&mut buf);
emit(&mut buf, out);
});
let name_len = u16le(&zip, 26) as usize;
let cd = 30 + name_len + 24;
assert_eq!(u32le(&zip, cd), SIG_CENTRAL, "CD sig");
let ext_attr = u32le(&zip, cd + 38);
assert_eq!(ext_attr >> 16 & 0o170_000, 0o040_000, "S_IFDIR bit");
}
#[test]
fn multiple_entries_offsets() {
let a_data = b"aaaa";
let b_data = b"bbbbbbbb";
let zip = collect_archive(|archive, out| {
let mut buf = BytesMut::new();
archive.start_file(
"a.txt".into(),
MsDosDateTime::default(),
CompressionMethod::Stored,
&mut buf,
);
emit(&mut buf, out);
archive.file_data(a_data);
out.extend_from_slice(a_data);
archive.end_file(&mut buf);
emit(&mut buf, out);
archive.start_file(
"b.txt".into(),
MsDosDateTime::default(),
CompressionMethod::Stored,
&mut buf,
);
emit(&mut buf, out);
archive.file_data(b_data);
out.extend_from_slice(b_data);
archive.end_file(&mut buf);
emit(&mut buf, out);
archive.finish(&mut buf);
emit(&mut buf, out);
});
let name_len_a = u16le(&zip, 26) as usize; let local_b = (30 + name_len_a + 4 + 24) as u64;
assert_eq!(u32le(&zip, local_b as usize), SIG_LOCAL, "second local sig");
let eocd_std = zip.len() - 22;
let loc_off = eocd_std - 20;
let z64_off = u64le(&zip, loc_off + 8) as usize;
let cd_offset = u64le(&zip, z64_off + 48) as usize;
let name_len_cd_a = u16le(&zip, cd_offset + 28) as usize;
let extra_len_cd_a = u16le(&zip, cd_offset + 30) as usize;
let cex_a = cd_offset + 46 + name_len_cd_a;
assert_eq!(u64le(&zip, cex_a + 20), 0, "first entry local offset = 0");
let cd_b = cd_offset + 46 + name_len_cd_a + extra_len_cd_a;
let cex_b = cd_b + 46 + u16le(&zip, cd_b + 28) as usize;
assert_eq!(
u64le(&zip, cex_b + 20),
local_b,
"second entry local offset"
);
}
}