Skip to main content

oxihuman_export/
zip_pack.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4#![allow(dead_code)]
5
6//! Minimal hand-crafted ZIP file writer (STORE compression only, no external zip crate).
7//!
8//! Supports writing and reading ZIP archives with:
9//! - Local file headers (signature 0x04034b50)
10//! - Central directory entries
11//! - End of central directory record (signature 0x06054b50)
12
13use std::path::Path;
14
15// ── little-endian helpers ────────────────────────────────────────────────────
16
17fn write_u16_le(buf: &mut Vec<u8>, v: u16) {
18    buf.push((v & 0xFF) as u8);
19    buf.push((v >> 8) as u8);
20}
21
22fn write_u32_le(buf: &mut Vec<u8>, v: u32) {
23    buf.push((v & 0xFF) as u8);
24    buf.push(((v >> 8) & 0xFF) as u8);
25    buf.push(((v >> 16) & 0xFF) as u8);
26    buf.push((v >> 24) as u8);
27}
28
29fn read_u16_le(data: &[u8], offset: usize) -> u16 {
30    (data[offset] as u16) | ((data[offset + 1] as u16) << 8)
31}
32
33fn read_u32_le(data: &[u8], offset: usize) -> u32 {
34    (data[offset] as u32)
35        | ((data[offset + 1] as u32) << 8)
36        | ((data[offset + 2] as u32) << 16)
37        | ((data[offset + 3] as u32) << 24)
38}
39
40// ── CRC-32 (IEEE polynomial, table-based) ───────────────────────────────────
41
42/// Compute standard CRC-32 (IEEE polynomial 0xEDB88320) of `data`.
43pub fn crc32(data: &[u8]) -> u32 {
44    // Build lookup table at call time (avoids global mutable state)
45    let mut table = [0u32; 256];
46    for i in 0u32..256 {
47        let mut c = i;
48        for _ in 0..8 {
49            if c & 1 != 0 {
50                c = 0xEDB88320 ^ (c >> 1);
51            } else {
52                c >>= 1;
53            }
54        }
55        table[i as usize] = c;
56    }
57
58    let mut crc: u32 = 0xFFFF_FFFF;
59    for &byte in data {
60        let idx = ((crc ^ byte as u32) & 0xFF) as usize;
61        crc = table[idx] ^ (crc >> 8);
62    }
63    crc ^ 0xFFFF_FFFF
64}
65
66// ── Public types ─────────────────────────────────────────────────────────────
67
68/// A single file entry to be placed inside a ZIP archive.
69pub struct ZipEntry {
70    pub filename: String,
71    pub data: Vec<u8>,
72}
73
74/// Result metadata returned after writing a ZIP archive.
75pub struct ZipPackResult {
76    pub path: std::path::PathBuf,
77    pub entry_count: usize,
78    pub total_bytes: usize,
79    pub zip_size_bytes: usize,
80}
81
82// ── ZIP constants ─────────────────────────────────────────────────────────────
83
84const LOCAL_FILE_HEADER_SIG: u32 = 0x04034B50;
85const CENTRAL_DIR_SIG: u32 = 0x02014B50;
86const END_OF_CENTRAL_DIR_SIG: u32 = 0x06054B50;
87
88const VERSION_NEEDED: u16 = 20;
89const VERSION_MADE_BY: u16 = 20; // MS-DOS + compatible
90
91// ── Core ZIP builder ──────────────────────────────────────────────────────────
92
93/// Build ZIP archive bytes entirely in memory.
94///
95/// Uses STORE compression (no deflate). Safe for WASM targets.
96pub fn zip_bytes(entries: &[ZipEntry]) -> Vec<u8> {
97    let mut buf: Vec<u8> = Vec::new();
98
99    // Track (local_header_offset, crc32, compressed_size, filename_bytes) per entry
100    struct EntryMeta {
101        offset: u32,
102        crc: u32,
103        size: u32,
104        filename: Vec<u8>,
105    }
106
107    let mut metas: Vec<EntryMeta> = Vec::with_capacity(entries.len());
108
109    // ── Local file entries ──────────────────────────────────────────────────
110    for entry in entries {
111        let fname_bytes = entry.filename.as_bytes();
112        let data_len = entry.data.len() as u32;
113        let crc = crc32(&entry.data);
114        let offset = buf.len() as u32;
115
116        // Local file header (30 bytes + filename)
117        write_u32_le(&mut buf, LOCAL_FILE_HEADER_SIG); // signature
118        write_u16_le(&mut buf, VERSION_NEEDED); // version needed
119        write_u16_le(&mut buf, 0); // general purpose bit flag
120        write_u16_le(&mut buf, 0); // compression method: STORE
121        write_u16_le(&mut buf, 0); // last mod file time
122        write_u16_le(&mut buf, 0); // last mod file date
123        write_u32_le(&mut buf, crc); // crc-32
124        write_u32_le(&mut buf, data_len); // compressed size
125        write_u32_le(&mut buf, data_len); // uncompressed size
126        write_u16_le(&mut buf, fname_bytes.len() as u16); // filename length
127        write_u16_le(&mut buf, 0); // extra field length
128
129        buf.extend_from_slice(fname_bytes); // filename
130        buf.extend_from_slice(&entry.data); // file data
131
132        metas.push(EntryMeta {
133            offset,
134            crc,
135            size: data_len,
136            filename: fname_bytes.to_vec(),
137        });
138    }
139
140    // ── Central directory ───────────────────────────────────────────────────
141    let central_dir_start = buf.len() as u32;
142
143    for meta in &metas {
144        write_u32_le(&mut buf, CENTRAL_DIR_SIG); // signature
145        write_u16_le(&mut buf, VERSION_MADE_BY); // version made by
146        write_u16_le(&mut buf, VERSION_NEEDED); // version needed to extract
147        write_u16_le(&mut buf, 0); // general purpose bit flag
148        write_u16_le(&mut buf, 0); // compression method: STORE
149        write_u16_le(&mut buf, 0); // last mod file time
150        write_u16_le(&mut buf, 0); // last mod file date
151        write_u32_le(&mut buf, meta.crc); // crc-32
152        write_u32_le(&mut buf, meta.size); // compressed size
153        write_u32_le(&mut buf, meta.size); // uncompressed size
154        write_u16_le(&mut buf, meta.filename.len() as u16); // filename length
155        write_u16_le(&mut buf, 0); // extra field length
156        write_u16_le(&mut buf, 0); // file comment length
157        write_u16_le(&mut buf, 0); // disk number start
158        write_u16_le(&mut buf, 0); // internal file attributes
159        write_u32_le(&mut buf, 0); // external file attributes
160        write_u32_le(&mut buf, meta.offset); // relative offset of local header
161        buf.extend_from_slice(&meta.filename); // filename
162    }
163
164    let central_dir_size = buf.len() as u32 - central_dir_start;
165
166    // ── End of central directory record ────────────────────────────────────
167    write_u32_le(&mut buf, END_OF_CENTRAL_DIR_SIG); // signature
168    write_u16_le(&mut buf, 0); // disk number
169    write_u16_le(&mut buf, 0); // disk with start of central directory
170    write_u16_le(&mut buf, entries.len() as u16); // entries on this disk
171    write_u16_le(&mut buf, entries.len() as u16); // total entries
172    write_u32_le(&mut buf, central_dir_size); // size of central directory
173    write_u32_le(&mut buf, central_dir_start); // offset of central directory
174    write_u16_le(&mut buf, 0); // comment length
175
176    buf
177}
178
179// ── Public API ────────────────────────────────────────────────────────────────
180
181/// Write a ZIP archive to `path`.
182///
183/// Uses STORE compression (no deflate). Returns metadata about the written archive.
184pub fn write_zip(entries: &[ZipEntry], path: &Path) -> anyhow::Result<ZipPackResult> {
185    let bytes = zip_bytes(entries);
186    let zip_size = bytes.len();
187    let total_bytes: usize = entries.iter().map(|e| e.data.len()).sum();
188
189    std::fs::write(path, &bytes)?;
190
191    Ok(ZipPackResult {
192        path: path.to_path_buf(),
193        entry_count: entries.len(),
194        total_bytes,
195        zip_size_bytes: zip_size,
196    })
197}
198
199/// Scan the central directory and return the list of filenames stored in the ZIP.
200pub fn read_zip_entry_names(path: &Path) -> anyhow::Result<Vec<String>> {
201    let data = std::fs::read(path)?;
202    read_zip_entry_names_from_bytes(&data)
203}
204
205/// Inner helper that works on raw bytes (used by tests and public API).
206fn read_zip_entry_names_from_bytes(data: &[u8]) -> anyhow::Result<Vec<String>> {
207    // Find end-of-central-directory record by searching backwards for its signature.
208    // The EOCD is at least 22 bytes; comment may follow (max 65535 bytes).
209    if data.len() < 22 {
210        anyhow::bail!("data too short to be a valid ZIP archive");
211    }
212
213    let eocd_offset = find_eocd(data)
214        .ok_or_else(|| anyhow::anyhow!("end-of-central-directory record not found"))?;
215
216    // Parse EOCD
217    // offset +4 : disk number (2)
218    // offset +6 : disk with cd start (2)
219    // offset +8 : entries on this disk (2)
220    // offset +10: total entries (2)
221    // offset +12: cd size (4)
222    // offset +16: cd offset (4)
223    let cd_offset = read_u32_le(data, eocd_offset + 16) as usize;
224    let total_entries = read_u16_le(data, eocd_offset + 10) as usize;
225
226    let mut names = Vec::with_capacity(total_entries);
227    let mut pos = cd_offset;
228
229    for _ in 0..total_entries {
230        if pos + 46 > data.len() {
231            anyhow::bail!("central directory entry truncated at offset {pos}");
232        }
233        let sig = read_u32_le(data, pos);
234        if sig != CENTRAL_DIR_SIG {
235            anyhow::bail!("expected central directory signature at offset {pos}");
236        }
237        let fname_len = read_u16_le(data, pos + 28) as usize;
238        let extra_len = read_u16_le(data, pos + 30) as usize;
239        let comment_len = read_u16_le(data, pos + 32) as usize;
240
241        let fname_start = pos + 46;
242        if fname_start + fname_len > data.len() {
243            anyhow::bail!("filename extends past end of data");
244        }
245        let fname_bytes = &data[fname_start..fname_start + fname_len];
246        let fname = String::from_utf8_lossy(fname_bytes).into_owned();
247        names.push(fname);
248
249        pos += 46 + fname_len + extra_len + comment_len;
250    }
251
252    Ok(names)
253}
254
255/// Search backwards from the end of `data` for the EOCD signature.
256fn find_eocd(data: &[u8]) -> Option<usize> {
257    // EOCD signature as little-endian bytes: 50 4B 05 06
258    const EOCD_SIG_BYTES: [u8; 4] = [0x50, 0x4B, 0x05, 0x06];
259    // Search from the end; the minimum comment length is 0, max 65535
260    let search_start = data.len().saturating_sub(22 + 65535);
261    for i in (search_start..=data.len().saturating_sub(22)).rev() {
262        if data[i..i + 4] == EOCD_SIG_BYTES {
263            return Some(i);
264        }
265    }
266    None
267}
268
269/// Bundle `mesh.glb`, `params.json`, and `manifest.json` into a single ZIP archive.
270pub fn pack_mesh_assets(
271    mesh_glb: &[u8],
272    params_json: &[u8],
273    manifest_json: &[u8],
274    path: &Path,
275) -> anyhow::Result<ZipPackResult> {
276    let entries = vec![
277        ZipEntry {
278            filename: "mesh.glb".to_string(),
279            data: mesh_glb.to_vec(),
280        },
281        ZipEntry {
282            filename: "params.json".to_string(),
283            data: params_json.to_vec(),
284        },
285        ZipEntry {
286            filename: "manifest.json".to_string(),
287            data: manifest_json.to_vec(),
288        },
289    ];
290    write_zip(&entries, path)
291}
292
293/// Check that the archive at `path` ends with a valid end-of-central-directory record.
294pub fn validate_zip(path: &Path) -> anyhow::Result<bool> {
295    let data = std::fs::read(path)?;
296    Ok(find_eocd(&data).is_some())
297}
298
299// ── Tests ─────────────────────────────────────────────────────────────────────
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304    use std::path::Path;
305
306    // ── CRC-32 known values ──────────────────────────────────────────────────
307
308    #[test]
309    fn test_crc32_empty() {
310        // CRC-32 of empty data is defined as 0x00000000
311        assert_eq!(crc32(&[]), 0x0000_0000);
312    }
313
314    #[test]
315    fn test_crc32_hello() {
316        // Standard CRC-32("Hello World") = 0x4A17B156
317        assert_eq!(crc32(b"Hello World"), 0x4A17B156);
318    }
319
320    #[test]
321    fn test_crc32_abc() {
322        // Standard CRC-32("abc") = 0x352441C2
323        assert_eq!(crc32(b"abc"), 0x352441C2);
324    }
325
326    #[test]
327    fn test_crc32_single_zero() {
328        // CRC-32([0x00]) = 0xD202EF8D
329        assert_eq!(crc32(&[0x00]), 0xD202EF8D);
330    }
331
332    #[test]
333    fn test_crc32_all_ones_byte() {
334        // CRC-32([0xFF]) = 0xFF000000
335        assert_eq!(crc32(&[0xFF]), 0xFF000000);
336    }
337
338    #[test]
339    fn test_crc32_deterministic() {
340        // Same data always produces same result
341        let data = b"oxihuman mesh export";
342        assert_eq!(crc32(data), crc32(data));
343    }
344
345    #[test]
346    fn test_crc32_different_data_different_result() {
347        assert_ne!(crc32(b"alpha"), crc32(b"beta"));
348    }
349
350    // ── write_zip / read_zip_entry_names round-trip ──────────────────────────
351
352    #[test]
353    fn test_write_zip_round_trip() {
354        let path = Path::new("/tmp/oxihuman_zip_pack_roundtrip.zip");
355        let entries = vec![
356            ZipEntry {
357                filename: "hello.txt".to_string(),
358                data: b"Hello ZIP!".to_vec(),
359            },
360            ZipEntry {
361                filename: "world.bin".to_string(),
362                data: vec![0x01, 0x02, 0x03],
363            },
364        ];
365        let result = write_zip(&entries, path).expect("write_zip failed");
366        assert_eq!(result.entry_count, 2);
367        assert_eq!(result.total_bytes, 13); // 10 + 3
368
369        let names = read_zip_entry_names(path).expect("read_zip_entry_names failed");
370        assert_eq!(names, vec!["hello.txt", "world.bin"]);
371    }
372
373    #[test]
374    fn test_write_zip_single_entry() {
375        let path = Path::new("/tmp/oxihuman_zip_pack_single.zip");
376        let entries = vec![ZipEntry {
377            filename: "data.bin".to_string(),
378            data: vec![42u8; 100],
379        }];
380        let result = write_zip(&entries, path).expect("write_zip failed");
381        assert_eq!(result.entry_count, 1);
382        assert_eq!(result.total_bytes, 100);
383        assert!(result.zip_size_bytes > 100); // overhead from headers
384    }
385
386    // ── zip_bytes in-memory ───────────────────────────────────────────────────
387
388    #[test]
389    fn test_zip_bytes_non_empty() {
390        let entries = vec![ZipEntry {
391            filename: "test.txt".to_string(),
392            data: b"WASM test".to_vec(),
393        }];
394        let bytes = zip_bytes(&entries);
395        // Must start with local file header signature PK\x03\x04
396        assert_eq!(&bytes[0..4], &[0x50, 0x4B, 0x03, 0x04]);
397        // Must end with EOCD signature PK\x05\x06
398        let eocd_pos = bytes.len() - 22;
399        assert_eq!(&bytes[eocd_pos..eocd_pos + 4], &[0x50, 0x4B, 0x05, 0x06]);
400    }
401
402    #[test]
403    fn test_zip_bytes_empty_entries() {
404        // An empty ZIP should still have a valid EOCD
405        let bytes = zip_bytes(&[]);
406        assert_eq!(bytes.len(), 22); // only EOCD
407        assert_eq!(&bytes[0..4], &[0x50, 0x4B, 0x05, 0x06]);
408    }
409
410    #[test]
411    fn test_zip_bytes_entry_names_roundtrip() {
412        let entries = vec![
413            ZipEntry {
414                filename: "mesh.glb".to_string(),
415                data: vec![0u8; 64],
416            },
417            ZipEntry {
418                filename: "params.json".to_string(),
419                data: b"{}".to_vec(),
420            },
421        ];
422        let bytes = zip_bytes(&entries);
423        let names = read_zip_entry_names_from_bytes(&bytes).expect("parse failed");
424        assert_eq!(names, vec!["mesh.glb", "params.json"]);
425    }
426
427    // ── pack_mesh_assets ─────────────────────────────────────────────────────
428
429    #[test]
430    fn test_pack_mesh_assets() {
431        let path = Path::new("/tmp/oxihuman_zip_pack_mesh.zip");
432        let glb = vec![0x67, 0x6C, 0x54, 0x46]; // "glTF" magic
433        let params = b"{\"height\": 180}";
434        let manifest = b"{\"version\": 1}";
435
436        let result =
437            pack_mesh_assets(&glb, params, manifest, path).expect("pack_mesh_assets failed");
438        assert_eq!(result.entry_count, 3);
439        assert_eq!(
440            result.total_bytes,
441            glb.len() + params.len() + manifest.len()
442        );
443
444        let names = read_zip_entry_names(path).expect("read names failed");
445        assert!(names.contains(&"mesh.glb".to_string()));
446        assert!(names.contains(&"params.json".to_string()));
447        assert!(names.contains(&"manifest.json".to_string()));
448    }
449
450    // ── validate_zip ─────────────────────────────────────────────────────────
451
452    #[test]
453    fn test_validate_zip_valid() {
454        let path = Path::new("/tmp/oxihuman_zip_pack_validate_valid.zip");
455        let entries = vec![ZipEntry {
456            filename: "a.txt".to_string(),
457            data: b"hello".to_vec(),
458        }];
459        write_zip(&entries, path).expect("write_zip failed");
460        let valid = validate_zip(path).expect("validate_zip failed");
461        assert!(valid);
462    }
463
464    #[test]
465    fn test_validate_zip_invalid() {
466        let path = Path::new("/tmp/oxihuman_zip_pack_validate_invalid.zip");
467        // Write garbage bytes — no EOCD signature
468        std::fs::write(path, b"not a zip file at all!!!").expect("write failed");
469        let valid = validate_zip(path).expect("validate_zip call failed");
470        assert!(!valid);
471    }
472
473    // ── empty ZIP file ────────────────────────────────────────────────────────
474
475    #[test]
476    fn test_write_empty_zip() {
477        let path = Path::new("/tmp/oxihuman_zip_pack_empty.zip");
478        let result = write_zip(&[], path).expect("write_zip failed");
479        assert_eq!(result.entry_count, 0);
480        assert_eq!(result.total_bytes, 0);
481        // An empty ZIP is 22 bytes (just the EOCD)
482        assert_eq!(result.zip_size_bytes, 22);
483
484        let names = read_zip_entry_names(path).expect("read names failed");
485        assert!(names.is_empty());
486
487        let valid = validate_zip(path).expect("validate failed");
488        assert!(valid);
489    }
490
491    // ── ZipPackResult fields ──────────────────────────────────────────────────
492
493    #[test]
494    fn test_zip_pack_result_path() {
495        let path = Path::new("/tmp/oxihuman_zip_pack_result_path.zip");
496        let entries = vec![ZipEntry {
497            filename: "x.bin".to_string(),
498            data: vec![1, 2, 3, 4, 5],
499        }];
500        let result = write_zip(&entries, path).expect("write_zip failed");
501        assert_eq!(result.path, path.to_path_buf());
502        assert!(result.zip_size_bytes >= result.total_bytes);
503    }
504
505    // ── CRC stored in local header matches recomputed CRC ────────────────────
506
507    #[test]
508    fn test_local_header_crc_correct() {
509        let data = b"check my crc";
510        let entries = vec![ZipEntry {
511            filename: "crc_test.txt".to_string(),
512            data: data.to_vec(),
513        }];
514        let bytes = zip_bytes(&entries);
515        // Local file header: 4(sig) + 2 + 2 + 2 + 2 + 2 + 4(crc) starts at offset 14
516        let stored_crc = read_u32_le(&bytes, 14);
517        assert_eq!(stored_crc, crc32(data));
518    }
519}