Skip to main content

lo_zip/
lib.rs

1use std::fs::File;
2use std::io::{Cursor, Read, Seek, Write};
3use std::path::Path;
4
5use lo_core::{LoError, Result};
6
7pub mod archive;
8pub use archive::{
9    normalize_zip_path, rels_path_for, resolve_part_target, ZipArchive, ZipEntryMeta,
10};
11
12#[derive(Clone, Debug, PartialEq, Eq)]
13pub struct ZipEntry {
14    pub name: String,
15    pub data: Vec<u8>,
16}
17
18impl ZipEntry {
19    pub fn new(name: impl Into<String>, data: impl Into<Vec<u8>>) -> Self {
20        Self {
21            name: name.into(),
22            data: data.into(),
23        }
24    }
25}
26
27#[derive(Clone, Debug, PartialEq, Eq)]
28pub struct CentralEntry {
29    pub name: String,
30    pub compressed_size: u32,
31    pub uncompressed_size: u32,
32    pub local_header_offset: u32,
33}
34
35fn write_u16_le<W: Write>(writer: &mut W, value: u16) -> Result<()> {
36    writer.write_all(&value.to_le_bytes())?;
37    Ok(())
38}
39
40fn write_u32_le<W: Write>(writer: &mut W, value: u32) -> Result<()> {
41    writer.write_all(&value.to_le_bytes())?;
42    Ok(())
43}
44
45fn read_u16_le(slice: &[u8], offset: usize) -> Result<u16> {
46    let bytes = slice
47        .get(offset..offset + 2)
48        .ok_or_else(|| LoError::Parse("unexpected end of ZIP file".to_string()))?;
49    Ok(u16::from_le_bytes([bytes[0], bytes[1]]))
50}
51
52fn read_u32_le(slice: &[u8], offset: usize) -> Result<u32> {
53    let bytes = slice
54        .get(offset..offset + 4)
55        .ok_or_else(|| LoError::Parse("unexpected end of ZIP file".to_string()))?;
56    Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
57}
58
59pub fn crc32(bytes: &[u8]) -> u32 {
60    let mut crc = 0xffff_ffffu32;
61    for &byte in bytes {
62        let mut x = (crc ^ (byte as u32)) & 0xff;
63        for _ in 0..8 {
64            x = if x & 1 != 0 {
65                0xedb8_8320u32 ^ (x >> 1)
66            } else {
67                x >> 1
68            };
69        }
70        crc = (crc >> 8) ^ x;
71    }
72    !crc
73}
74
75pub fn write_zip<W: Write + Seek>(writer: &mut W, entries: &[ZipEntry]) -> Result<()> {
76    struct CentralRecord {
77        name: String,
78        crc: u32,
79        size: u32,
80        offset: u32,
81    }
82
83    let mut records = Vec::with_capacity(entries.len());
84
85    for entry in entries {
86        let offset = writer.stream_position()? as u32;
87        let name_bytes = entry.name.as_bytes();
88        let data = &entry.data;
89        let size = u32::try_from(data.len())
90            .map_err(|_| LoError::InvalidInput("ZIP entry too large".to_string()))?;
91        let crc = crc32(data);
92
93        write_u32_le(writer, 0x0403_4b50)?;
94        write_u16_le(writer, 20)?;
95        write_u16_le(writer, 0)?;
96        write_u16_le(writer, 0)?;
97        write_u16_le(writer, 0)?;
98        write_u16_le(writer, 0)?;
99        write_u32_le(writer, crc)?;
100        write_u32_le(writer, size)?;
101        write_u32_le(writer, size)?;
102        write_u16_le(
103            writer,
104            u16::try_from(name_bytes.len())
105                .map_err(|_| LoError::InvalidInput("ZIP entry name too long".to_string()))?,
106        )?;
107        write_u16_le(writer, 0)?;
108        writer.write_all(name_bytes)?;
109        writer.write_all(data)?;
110
111        records.push(CentralRecord {
112            name: entry.name.clone(),
113            crc,
114            size,
115            offset,
116        });
117    }
118
119    let central_directory_offset = writer.stream_position()? as u32;
120
121    for record in &records {
122        let name_bytes = record.name.as_bytes();
123        write_u32_le(writer, 0x0201_4b50)?;
124        write_u16_le(writer, 20)?;
125        write_u16_le(writer, 20)?;
126        write_u16_le(writer, 0)?;
127        write_u16_le(writer, 0)?;
128        write_u16_le(writer, 0)?;
129        write_u16_le(writer, 0)?;
130        write_u32_le(writer, record.crc)?;
131        write_u32_le(writer, record.size)?;
132        write_u32_le(writer, record.size)?;
133        write_u16_le(
134            writer,
135            u16::try_from(name_bytes.len())
136                .map_err(|_| LoError::InvalidInput("ZIP entry name too long".to_string()))?,
137        )?;
138        write_u16_le(writer, 0)?;
139        write_u16_le(writer, 0)?;
140        write_u16_le(writer, 0)?;
141        write_u16_le(writer, 0)?;
142        write_u32_le(writer, 0)?;
143        write_u32_le(writer, record.offset)?;
144        writer.write_all(name_bytes)?;
145    }
146
147    let central_directory_size = (writer.stream_position()? as u32) - central_directory_offset;
148
149    write_u32_le(writer, 0x0605_4b50)?;
150    write_u16_le(writer, 0)?;
151    write_u16_le(writer, 0)?;
152    write_u16_le(
153        writer,
154        u16::try_from(records.len())
155            .map_err(|_| LoError::InvalidInput("too many ZIP entries".to_string()))?,
156    )?;
157    write_u16_le(
158        writer,
159        u16::try_from(records.len())
160            .map_err(|_| LoError::InvalidInput("too many ZIP entries".to_string()))?,
161    )?;
162    write_u32_le(writer, central_directory_size)?;
163    write_u32_le(writer, central_directory_offset)?;
164    write_u16_le(writer, 0)?;
165    Ok(())
166}
167
168pub fn write_zip_file(path: impl AsRef<Path>, entries: &[ZipEntry]) -> Result<()> {
169    let mut file = File::create(path)?;
170    write_zip(&mut file, entries)
171}
172
173/// Serialize a list of ZIP entries into an in-memory `Vec<u8>`.
174///
175/// Convenient for callers that want to package an OOXML/ODF document and
176/// then hand the bytes to a downstream consumer (HTTP, embedding, hashing).
177pub fn write_zip_to_vec(entries: &[ZipEntry]) -> Result<Vec<u8>> {
178    let mut cursor = Cursor::new(Vec::new());
179    write_zip(&mut cursor, entries)?;
180    Ok(cursor.into_inner())
181}
182
183/// Build an OOXML package (`.docx`/`.xlsx`/`.pptx`) from a list of entries.
184///
185/// OOXML packages are just plain ZIPs without the special "mimetype must be
186/// the first stored file" requirement that ODF imposes, so this is a thin
187/// wrapper around [`write_zip_to_vec`] for readability at call sites.
188pub fn ooxml_package(entries: &[ZipEntry]) -> Result<Vec<u8>> {
189    write_zip_to_vec(entries)
190}
191
192/// Build an ODF package (`.odt`/`.ods`/`.odp`/`.odg`/`.odb`/`.odf`).
193///
194/// ODF requires the `mimetype` entry to be the first file in the archive
195/// and stored uncompressed; we already store everything uncompressed so we
196/// just need to make sure `mimetype` comes first.
197pub fn odf_package(mimetype: &str, mut entries: Vec<ZipEntry>) -> Result<Vec<u8>> {
198    let mut all = Vec::with_capacity(entries.len() + 1);
199    all.push(ZipEntry::new("mimetype", mimetype.as_bytes().to_vec()));
200    all.append(&mut entries);
201    write_zip_to_vec(&all)
202}
203
204pub fn list_entries(path: impl AsRef<Path>) -> Result<Vec<CentralEntry>> {
205    let mut file = File::open(path)?;
206    let mut data = Vec::new();
207    file.read_to_end(&mut data)?;
208
209    let signature = [0x50, 0x4b, 0x05, 0x06];
210    let search_start = data.len().saturating_sub(66_000);
211    let eocd = (search_start..data.len().saturating_sub(3))
212        .rev()
213        .find(|&idx| data.get(idx..idx + 4) == Some(&signature))
214        .ok_or_else(|| LoError::Parse("could not find ZIP central directory".to_string()))?;
215
216    let entry_count = read_u16_le(&data, eocd + 10)? as usize;
217    let central_offset = read_u32_le(&data, eocd + 16)? as usize;
218
219    let mut cursor = central_offset;
220    let mut entries = Vec::with_capacity(entry_count);
221    for _ in 0..entry_count {
222        let sig = read_u32_le(&data, cursor)?;
223        if sig != 0x0201_4b50 {
224            return Err(LoError::Parse(
225                "invalid central directory header".to_string(),
226            ));
227        }
228        let compressed_size = read_u32_le(&data, cursor + 20)?;
229        let uncompressed_size = read_u32_le(&data, cursor + 24)?;
230        let name_len = read_u16_le(&data, cursor + 28)? as usize;
231        let extra_len = read_u16_le(&data, cursor + 30)? as usize;
232        let comment_len = read_u16_le(&data, cursor + 32)? as usize;
233        let local_header_offset = read_u32_le(&data, cursor + 42)?;
234        let name_start = cursor + 46;
235        let name_end = name_start + name_len;
236        let name_bytes = data
237            .get(name_start..name_end)
238            .ok_or_else(|| LoError::Parse("invalid central directory name".to_string()))?;
239        let name = String::from_utf8_lossy(name_bytes).to_string();
240        entries.push(CentralEntry {
241            name,
242            compressed_size,
243            uncompressed_size,
244            local_header_offset,
245        });
246        cursor = name_end + extra_len + comment_len;
247    }
248
249    Ok(entries)
250}
251
252pub fn read_entry(path: impl AsRef<Path>, entry_name: &str) -> Result<Vec<u8>> {
253    let mut file = File::open(path)?;
254    let mut data = Vec::new();
255    file.read_to_end(&mut data)?;
256
257    for entry in list_entries_from_bytes(&data)? {
258        if entry.name == entry_name {
259            let offset = entry.local_header_offset as usize;
260            let sig = read_u32_le(&data, offset)?;
261            if sig != 0x0403_4b50 {
262                return Err(LoError::Parse("invalid local ZIP header".to_string()));
263            }
264            let name_len = read_u16_le(&data, offset + 26)? as usize;
265            let extra_len = read_u16_le(&data, offset + 28)? as usize;
266            let body_start = offset + 30 + name_len + extra_len;
267            let body_end = body_start + entry.uncompressed_size as usize;
268            return data
269                .get(body_start..body_end)
270                .map(|slice| slice.to_vec())
271                .ok_or_else(|| LoError::Parse("invalid ZIP body range".to_string()));
272        }
273    }
274
275    Err(LoError::InvalidInput(format!(
276        "ZIP entry not found: {entry_name}"
277    )))
278}
279
280fn list_entries_from_bytes(data: &[u8]) -> Result<Vec<CentralEntry>> {
281    let signature = [0x50, 0x4b, 0x05, 0x06];
282    let search_start = data.len().saturating_sub(66_000);
283    let eocd = (search_start..data.len().saturating_sub(3))
284        .rev()
285        .find(|&idx| data.get(idx..idx + 4) == Some(&signature))
286        .ok_or_else(|| LoError::Parse("could not find ZIP central directory".to_string()))?;
287
288    let entry_count = read_u16_le(data, eocd + 10)? as usize;
289    let central_offset = read_u32_le(data, eocd + 16)? as usize;
290
291    let mut cursor = central_offset;
292    let mut entries = Vec::with_capacity(entry_count);
293    for _ in 0..entry_count {
294        let sig = read_u32_le(data, cursor)?;
295        if sig != 0x0201_4b50 {
296            return Err(LoError::Parse(
297                "invalid central directory header".to_string(),
298            ));
299        }
300        let compressed_size = read_u32_le(data, cursor + 20)?;
301        let uncompressed_size = read_u32_le(data, cursor + 24)?;
302        let name_len = read_u16_le(data, cursor + 28)? as usize;
303        let extra_len = read_u16_le(data, cursor + 30)? as usize;
304        let comment_len = read_u16_le(data, cursor + 32)? as usize;
305        let local_header_offset = read_u32_le(data, cursor + 42)?;
306        let name_start = cursor + 46;
307        let name_end = name_start + name_len;
308        let name_bytes = data
309            .get(name_start..name_end)
310            .ok_or_else(|| LoError::Parse("invalid central directory name".to_string()))?;
311        entries.push(CentralEntry {
312            name: String::from_utf8_lossy(name_bytes).to_string(),
313            compressed_size,
314            uncompressed_size,
315            local_header_offset,
316        });
317        cursor = name_end + extra_len + comment_len;
318    }
319    Ok(entries)
320}
321
322#[cfg(test)]
323mod tests {
324    use super::{
325        crc32, list_entries, ooxml_package, read_entry, write_zip_file, write_zip_to_vec, ZipEntry,
326    };
327    use std::time::{SystemTime, UNIX_EPOCH};
328
329    #[test]
330    fn crc32_matches_known_value() {
331        assert_eq!(crc32(b"123456789"), 0xcbf4_3926);
332    }
333
334    #[test]
335    fn zip_roundtrip_works() {
336        let ts = SystemTime::now()
337            .duration_since(UNIX_EPOCH)
338            .expect("system time before unix epoch")
339            .as_nanos();
340        let path = std::env::temp_dir().join(format!("lo_zip_{ts}.zip"));
341        write_zip_file(
342            &path,
343            &[
344                ZipEntry::new("a.txt", b"hello".to_vec()),
345                ZipEntry::new("dir/b.txt", b"world".to_vec()),
346            ],
347        )
348        .expect("write zip");
349        let entries = list_entries(&path).expect("list entries");
350        assert_eq!(entries.len(), 2);
351        assert_eq!(read_entry(&path, "a.txt").expect("read entry"), b"hello");
352        let _ = std::fs::remove_file(path);
353    }
354
355    #[test]
356    fn write_to_vec_starts_with_pk_signature() {
357        let bytes =
358            write_zip_to_vec(&[ZipEntry::new("a.txt", b"hi".to_vec())]).expect("write zip to vec");
359        assert!(bytes.starts_with(b"PK"));
360    }
361
362    #[test]
363    fn ooxml_package_is_valid_zip() {
364        let bytes = ooxml_package(&[
365            ZipEntry::new("[Content_Types].xml", b"<Types/>".to_vec()),
366            ZipEntry::new("word/document.xml", b"<doc/>".to_vec()),
367        ])
368        .expect("build ooxml");
369        assert!(bytes.starts_with(b"PK"));
370    }
371}