bindle-file 0.0.1

an efficient binary archive format
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
//! Bindle is a binary archive format for collecting files.
//!
//! The format uses memory-mapped I/O for fast reads, optional zstd compression,
//! and supports append-only writes with shadowing for updates.
//!
//! # Example
//!
//! ```no_run
//! use bindle_file::{Bindle, Compress};
//!
//! let mut archive = Bindle::open("data.bndl")?;
//! archive.add("file.txt", b"data", Compress::None)?;
//! archive.save()?;
//!
//! let data = archive.read("file.txt").unwrap();
//! # Ok::<(), std::io::Error>(())
//! ```

use std::io::{self, Write};

// Module declarations
mod bindle;
mod compress;
mod entry;
mod reader;
mod writer;

pub(crate) mod ffi;

// Public re-exports
pub use bindle::Bindle;
pub use compress::Compress;
pub use entry::Entry;
pub use reader::Reader;
pub use writer::Writer;

// Constants
pub(crate) const BNDL_MAGIC: &[u8; 8] = b"BINDL001";
pub(crate) const BNDL_ALIGN: usize = 8;
pub(crate) const ENTRY_SIZE: usize = std::mem::size_of::<Entry>();
pub(crate) const FOOTER_SIZE: usize = std::mem::size_of::<entry::Footer>();
pub(crate) const HEADER_SIZE: usize = 8;
pub(crate) const AUTO_COMPRESS_THRESHOLD: usize = 2048;
pub(crate) const FOOTER_MAGIC: u32 = 0x62626262;
const ZEROS: &[u8; 64] = &[0u8; 64]; // Reusable zero buffer for padding

// Helper functions
pub(crate) fn pad<
    const SIZE: usize,
    T: Copy + TryFrom<usize> + std::ops::Sub<T, Output = T> + std::ops::Rem<T, Output = T>,
>(
    n: T,
) -> T
where
    <T as std::ops::Sub>::Output: std::ops::Rem<T>,
{
    if let Ok(size) = T::try_from(SIZE) {
        return (size - (n % size)) % size;
    }

    unreachable!()
}

// Helper to write padding zeros without allocating
pub(crate) fn write_padding<W: Write>(writer: &mut W, len: usize) -> io::Result<()> {
    let mut remaining = len;
    while remaining > 0 {
        let chunk = remaining.min(ZEROS.len());
        writer.write_all(&ZEROS[..chunk])?;
        remaining -= chunk;
    }
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use std::fs::OpenOptions;
    use std::io::{Seek, SeekFrom};

    #[test]
    fn test_create_and_read() {
        let path = "test_basic.bindl";
        let data = b"Hello, Bindle World!";

        // 1. Create and Write
        {
            let mut fp = Bindle::open(path).expect("Failed to open");
            fp.add("hello.txt", data, Compress::None)
                .expect("Failed to add");
            fp.save().expect("Failed to commit");
        }

        // 2. Open and Read
        {
            let fp = Bindle::open(path).expect("Failed to re-open");
            let result = fp.read("hello.txt").expect("File not found");
            assert_eq!(result.as_ref(), data);
        }

        fs::remove_file(path).ok();
    }

    #[test]
    fn test_zstd_compression() {
        let path = "test_zstd.bindl";
        // Highly compressible data
        let data = vec![b'A'; 1000];

        {
            let mut fp = Bindle::open(path).expect("Failed to open");
            fp.add("large.bin", &data, Compress::Zstd)
                .expect("Failed to add");
            fp.save().expect("Failed to commit");
        }

        let fp = Bindle::open(path).expect("Failed to re-open");

        // Ensure data is correct
        let result = fp.read("large.bin").expect("File not found");
        assert_eq!(result, data);

        // Ensure the file on disk is actually smaller than the raw data (including headers)
        let meta = fs::metadata(path).unwrap();
        assert!(meta.len() < 1000);

        fs::remove_file(path).ok();
    }

    #[test]
    fn test_append_functionality() {
        let path = "test_append.bindl";
        let _ = std::fs::remove_file(path);

        // 1. Initial creation
        {
            let mut fp = Bindle::open(path).expect("Fail open 1");
            fp.add("1.txt", b"First", Compress::Zstd).unwrap();
            fp.save().expect("Fail commit 1");
        } // File handle closed here

        // 2. Append session
        {
            let mut fp = Bindle::open(path).expect("Fail open 2");
            // At this point, entries contains "1.txt"

            fp.add("2.txt", b"Second", Compress::None).unwrap();
            fp.save().expect("Fail commit 2");

            // Now test the read
            let first = fp.read("1.txt").expect("Could not find 1.txt");
            let second = fp.read("2.txt").expect("Could not find 2.txt");

            assert_eq!(first.as_ref(), b"First");
            assert_eq!(second.as_ref(), b"Second");
        }
        let _ = std::fs::remove_file(path);
    }

    #[test]
    fn test_invalid_magic() {
        let path = "invalid.bindl";
        fs::write(path, b"NOT_A_PACK_FILE_AT_ALL").unwrap();

        let res = Bindle::open(path);
        assert!(res.is_err());

        fs::remove_file(path).ok();
    }

    #[test]
    fn test_key_shadowing() {
        let path = "test_shadow.bindl";
        let _ = fs::remove_file(path);

        let mut b = Bindle::open(path).expect("Failed to open");

        // 1. Add initial version
        b.add("config.txt", b"v1", Compress::None).unwrap();
        b.save().unwrap();

        // 2. Overwrite with v2 (shadowing)
        b.add("config.txt", b"version_2_is_longer", Compress::None)
            .unwrap();
        b.save().unwrap();

        // 3. Verify latest version is retrieved
        let b2 = Bindle::open(path).expect("Failed to reopen");
        let result = b2.read("config.txt").unwrap();
        assert_eq!(result.as_ref(), b"version_2_is_longer");

        // 4. Verify index count hasn't grown (still 1 entry)
        assert_eq!(b2.len(), 1);

        fs::remove_file(path).ok();
    }

    #[test]
    fn test_vacuum_reclaims_space() {
        let path = "test_vacuum.bindl";
        let _ = fs::remove_file(path);

        let mut b = Bindle::open(path).expect("Failed to open");

        // 1. Add a large file
        let large_data = vec![0u8; 1024];
        b.add("large.bin", &large_data, Compress::None).unwrap();
        b.save().unwrap();
        let size_v1 = fs::metadata(path).unwrap().len();

        // 2. Shadow it with a tiny file
        b.add("large.bin", b"tiny", Compress::None).unwrap();
        b.save().unwrap();
        let size_v2 = fs::metadata(path).unwrap().len();

        // Size should have increased because we appended 'tiny'
        assert!(size_v2 > size_v1);

        // 3. Run Vacuum
        b.vacuum().expect("Vacuum failed");
        let size_v3 = fs::metadata(path).unwrap().len();

        // 4. Verify size is now significantly smaller (reclaimed 1024 bytes)
        assert!(size_v3 < size_v2);

        // 5. Verify data integrity after vacuum
        let b2 = Bindle::open(path).unwrap();
        assert_eq!(b2.read("large.bin").unwrap().as_ref(), b"tiny");

        fs::remove_file(path).ok();
    }

    #[test]
    fn test_directory_pack_unpack_roundtrip() {
        let bindle_path = "roundtrip.bindl";
        let src_dir = "test_src";
        let out_dir = "test_out";

        // Clean up previous runs
        let _ = fs::remove_dir_all(src_dir);
        let _ = fs::remove_dir_all(out_dir);
        let _ = fs::remove_file(bindle_path);

        // 1. Create a dummy directory structure
        fs::create_dir_all(format!("{}/subdir", src_dir)).unwrap();
        fs::write(format!("{}/file1.txt", src_dir), b"Hello World").unwrap();
        fs::write(
            format!("{}/subdir/file2.txt", src_dir),
            b"Compressed Data Content",
        )
        .unwrap();

        // 2. Pack the directory using Rust
        {
            let mut b = Bindle::open(bindle_path).unwrap();
            b.pack(src_dir, Compress::Zstd).expect("Pack failed");
            b.save().expect("Save failed");
        }

        // 3. Unpack the directory using Rust
        {
            let b = Bindle::open(bindle_path).unwrap();
            b.unpack(out_dir).expect("Unpack failed");
        }

        // 4. Verify the contents match exactly
        let content1 = fs::read_to_string(format!("{}/file1.txt", out_dir)).unwrap();
        let content2 = fs::read_to_string(format!("{}/subdir/file2.txt", out_dir)).unwrap();

        assert_eq!(content1, "Hello World");
        assert_eq!(content2, "Compressed Data Content");

        // Cleanup
        fs::remove_dir_all(src_dir).ok();
        fs::remove_dir_all(out_dir).ok();
        fs::remove_file(bindle_path).ok();
    }

    #[test]
    fn test_streaming_manual_chunks() {
        let path = "test_stream.bindl";
        let _ = std::fs::remove_file(path);
        let chunk1 = b"Hello ";
        let chunk2 = b"Streaming ";
        let chunk3 = b"World!";
        let expected = b"Hello Streaming World!";

        {
            let mut b = Bindle::open(path).expect("Failed to open");
            // Start a stream without compression
            let mut s = b
                .writer("streamed_file.txt", Compress::None)
                .expect("Failed to start stream");

            // Write chunks manually
            s.write_chunk(chunk1).unwrap();
            s.write_chunk(chunk2).unwrap();
            s.write_chunk(chunk3).unwrap();

            s.close().expect("Failed to finish stream");
            b.save().expect("Failed to save");
        }

        // Verification
        let b = Bindle::open(path).expect("Failed to reopen");
        let result = b.read("streamed_file.txt").expect("Entry not found");
        assert_eq!(result.as_ref(), expected);
        assert_eq!(result.len(), expected.len());

        let _ = std::fs::remove_file(path);
    }

    #[test]
    fn test_crc32_corruption_detection() {
        let path = "test_crc32.bindl";
        let _ = std::fs::remove_file(path);
        let data = b"Test data for CRC32 verification";

        // 1. Create a file with valid data
        {
            let mut b = Bindle::open(path).expect("Failed to open");
            b.add("test.txt", data, Compress::None).unwrap();
            b.save().unwrap();
        }

        // 2. Verify that reading with correct data works
        {
            let b = Bindle::open(path).expect("Failed to reopen");
            let result = b.read("test.txt").expect("Should read successfully");
            assert_eq!(result.as_ref(), data);
        }

        // 3. Corrupt the data by modifying a byte directly in the file
        {
            let mut file = OpenOptions::new()
                .write(true)
                .read(true)
                .open(path)
                .unwrap();

            // Skip the header and modify the first byte of data
            file.seek(SeekFrom::Start(HEADER_SIZE as u64)).unwrap();
            file.write(&[b'X']).unwrap(); // Corrupt first byte
            file.flush().unwrap();
        }

        // 4. Verify that reading corrupted data fails CRC32 check
        {
            let b = Bindle::open(path).expect("Failed to reopen after corruption");
            let result = b.read("test.txt");
            assert!(result.is_none(), "Read should fail due to CRC32 mismatch");
        }

        let _ = std::fs::remove_file(path);
    }

    #[test]
    fn test_crc32_with_compression() {
        let path = "test_crc32_compressed.bindl";
        let _ = std::fs::remove_file(path);
        let data = vec![b'A'; 2000]; // Large enough to trigger compression

        // 1. Create a file with compressed data
        {
            let mut b = Bindle::open(path).expect("Failed to open");
            b.add("compressed.bin", &data, Compress::Zstd).unwrap();
            b.save().unwrap();
        }

        // 2. Verify that reading compressed data works and CRC32 is verified
        {
            let b = Bindle::open(path).expect("Failed to reopen");
            let result = b.read("compressed.bin").expect("Should read successfully");
            assert_eq!(result.as_ref(), data.as_slice());
        }

        // 3. Also test with the streaming reader
        {
            let b = Bindle::open(path).expect("Failed to reopen");
            let mut reader = b.reader("compressed.bin").unwrap();
            let mut output = Vec::new();
            std::io::copy(&mut reader, &mut output).unwrap();
            reader.verify_crc32().expect("CRC32 should match");
            assert_eq!(output, data);
        }

        let _ = std::fs::remove_file(path);
    }

    #[test]
    fn test_remove_entry() {
        let path = "test_remove.bindl";
        let _ = fs::remove_file(path);

        let mut b = Bindle::open(path).expect("Failed to open");

        // Add some entries
        b.add("file1.txt", b"Content 1", Compress::None).unwrap();
        b.add("file2.txt", b"Content 2", Compress::None).unwrap();
        b.add("file3.txt", b"Content 3", Compress::None).unwrap();
        b.save().unwrap();

        assert_eq!(b.len(), 3);
        assert!(b.exists("file2.txt"));

        // Remove an entry
        assert!(b.remove("file2.txt"));
        assert_eq!(b.len(), 2);
        assert!(!b.exists("file2.txt"));

        // Try to remove non-existent entry
        assert!(!b.remove("nonexistent.txt"));

        // Save and reload to verify persistence
        b.save().unwrap();
        let b2 = Bindle::open(path).unwrap();
        assert_eq!(b2.len(), 2);
        assert!(b2.exists("file1.txt"));
        assert!(!b2.exists("file2.txt"));
        assert!(b2.exists("file3.txt"));

        // Verify data still readable for remaining entries
        assert_eq!(b2.read("file1.txt").unwrap().as_ref(), b"Content 1");
        assert_eq!(b2.read("file3.txt").unwrap().as_ref(), b"Content 3");

        fs::remove_file(path).ok();
    }
}