Skip to main content

bindle_file/
bindle.rs

1use crc32fast::Hasher;
2use memmap2::Mmap;
3use std::borrow::Cow;
4use std::collections::BTreeMap;
5use std::fs::{File, OpenOptions};
6use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
7use std::path::{Path, PathBuf};
8use zerocopy::{FromBytes, IntoBytes};
9
10use crate::compress::Compress;
11use crate::entry::{Entry, Footer};
12use crate::reader::{Either, Reader};
13use crate::writer::Writer;
14use crate::{
15    AUTO_COMPRESS_THRESHOLD, BNDL_ALIGN, BNDL_MAGIC, ENTRY_SIZE, FOOTER_MAGIC, FOOTER_SIZE,
16    HEADER_SIZE, pad, write_padding,
17};
18
19/// A binary archive for collecting files.
20///
21/// Uses memory-mapped I/O for fast reads, supports optional zstd compression, and handles updates via shadowing.
22/// Files can be added incrementally without rewriting the entire archive.
23///
24/// # Example
25///
26/// ```no_run
27/// use bindle_file::{Bindle, Compress};
28///
29/// let mut archive = Bindle::open("data.bndl")?;
30/// archive.add("file.txt", b"data", Compress::None)?;
31/// archive.save()?;
32/// # Ok::<(), std::io::Error>(())
33/// ```
34pub struct Bindle {
35    pub(crate) path: PathBuf,
36    pub(crate) file: File,
37    pub(crate) mmap: Option<Mmap>,
38    pub(crate) index: BTreeMap<String, Entry>,
39    pub(crate) data_end: u64,
40}
41
42impl Bindle {
43    /// Creates a new archive, overwriting any existing file at the path.
44    pub fn create<P: AsRef<Path>>(path: P) -> io::Result<Self> {
45        let path_buf = path.as_ref().to_path_buf();
46        let opts = OpenOptions::new()
47            .truncate(true)
48            .read(true)
49            .write(true)
50            .create(true)
51            .to_owned();
52        Self::new(path_buf, opts)
53    }
54
55    /// Opens an existing archive or creates a new one if it doesn't exist.
56    pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
57        let path_buf = path.as_ref().to_path_buf();
58        let opts = OpenOptions::new()
59            .read(true)
60            .write(true)
61            .create(true)
62            .to_owned();
63        Self::new(path_buf, opts)
64    }
65
66    /// Opens an existing archive. Returns an error if the file doesn't exist.
67    pub fn load<P: AsRef<Path>>(path: P) -> io::Result<Self> {
68        let path_buf = path.as_ref().to_path_buf();
69        let opts = OpenOptions::new().read(true).write(true).to_owned();
70        Self::new(path_buf, opts)
71    }
72
73    /// Create a new `Bindle` from a path and file, the path must match the file
74    pub fn new(path: PathBuf, opts: OpenOptions) -> io::Result<Self> {
75        let mut file = opts.open(&path)?;
76        file.lock_shared()?;
77        let len = file.metadata()?.len();
78
79        // Handle completely new/empty files
80        if len == 0 {
81            file.write_all(BNDL_MAGIC)?;
82            return Ok(Self {
83                path,
84                file,
85                mmap: None,
86                index: BTreeMap::new(),
87                data_end: HEADER_SIZE as u64,
88            });
89        }
90
91        // Safety check: File must be at least HEADER + FOOTER size (24 bytes)
92        // This prevents "attempt to subtract with overflow" when calculating footer_pos
93        if len < (HEADER_SIZE + FOOTER_SIZE) as u64 {
94            return Err(io::Error::new(
95                io::ErrorKind::InvalidData,
96                "File too small to be a valid bindle",
97            ));
98        }
99
100        let mut header = [0u8; 8];
101        file.read_exact(&mut header)?;
102        if &header != BNDL_MAGIC {
103            return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid header"));
104        }
105
106        let m = unsafe { Mmap::map(&file)? };
107
108        // Calculate footer position. Subtraction is now safe due to the check above.
109        let footer_pos = m.len() - FOOTER_SIZE;
110        let footer = Footer::read_from_bytes(&m[footer_pos..])
111            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Failed to read footer"))?;
112
113        if footer.magic() != FOOTER_MAGIC {
114            return Err(io::Error::new(
115                io::ErrorKind::InvalidData,
116                "Invalid footer, the file may be corrupt",
117            ));
118        }
119
120        let data_end = footer.index_offset();
121        let count = footer.entry_count();
122        let mut index = BTreeMap::new();
123
124        let mut cursor = data_end as usize;
125        for _ in 0..count {
126            // Ensure there is enough data left for an Entry header
127            if cursor + ENTRY_SIZE > footer_pos {
128                break;
129            }
130
131            let entry = match Entry::read_from_bytes(&m[cursor..cursor + ENTRY_SIZE]) {
132                Ok(e) => e,
133                Err(_) => break, // Corrupted entry, stop reading
134            };
135            let n_start = cursor + ENTRY_SIZE;
136
137            // Validate that the filename exists within the mapped bounds
138            if n_start + entry.name_len() > footer_pos {
139                break;
140            }
141
142            let name =
143                String::from_utf8_lossy(&m[n_start..n_start + entry.name_len()]).into_owned();
144            index.insert(name, entry);
145
146            let total = ENTRY_SIZE + entry.name_len();
147            cursor += (total + (BNDL_ALIGN - 1)) & !(BNDL_ALIGN - 1);
148        }
149
150        Ok(Self {
151            path,
152            file,
153            mmap: Some(m),
154            index,
155            data_end,
156        })
157    }
158
159    fn should_auto_compress(&self, compress: Compress, len: usize) -> bool {
160        compress == Compress::Zstd || (compress == Compress::Auto && len > AUTO_COMPRESS_THRESHOLD)
161    }
162
163    /// Adds data to the archive with the given name.
164    ///
165    /// If an entry with the same name exists, it will be shadowed. Call [`save()`](Bindle::save) to commit changes.
166    pub fn add(&mut self, name: &str, data: &[u8], compress: Compress) -> io::Result<()> {
167        let mut stream = self.writer(name, compress)?;
168        stream.write_all(data)?;
169        stream.close()?;
170        Ok(())
171    }
172
173    /// Adds a file from the filesystem to the archive.
174    ///
175    /// Reads the file at `path` and stores it with the given `name`. Call [`save()`](Bindle::save) to commit changes.
176    pub fn add_file(
177        &mut self,
178        name: &str,
179        path: impl AsRef<Path>,
180        compress: Compress,
181    ) -> io::Result<()> {
182        let mut stream = self.writer(name, compress)?;
183        let mut src = std::fs::File::open(path)?;
184        std::io::copy(&mut src, &mut stream)?;
185        Ok(())
186    }
187
188    /// Commits all pending changes by writing the index and footer to disk.
189    ///
190    /// Must be called after add/remove operations to make changes persistent.
191    pub fn save(&mut self) -> io::Result<()> {
192        self.file.lock()?;
193        self.file.seek(SeekFrom::Start(self.data_end))?;
194        let index_start = self.data_end;
195
196        // Use buffered writer to batch index writes
197        {
198            let mut writer = BufWriter::new(&mut self.file);
199            for (name, entry) in &self.index {
200                writer.write_all(entry.as_bytes())?;
201                writer.write_all(name.as_bytes())?;
202                let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len());
203                if pad > 0 {
204                    write_padding(&mut writer, pad)?;
205                }
206            }
207
208            let footer = Footer::new(index_start, self.index.len() as u32, FOOTER_MAGIC);
209            writer.write_all(footer.as_bytes())?;
210            writer.flush()?;
211        } // Drop writer here to release borrow
212
213        // Truncate file to current position to remove any old data
214        let current_pos = self.file.stream_position()?;
215        self.file.set_len(current_pos)?;
216
217        let mmap = unsafe { Mmap::map(&self.file)? };
218        self.mmap = Some(mmap);
219        self.file.lock_shared()?;
220        Ok(())
221    }
222
223    /// Reclaims space by removing shadowed data.
224    ///
225    /// Rebuilds the archive with only live entries, removing old versions of updated files.
226    pub fn vacuum(&mut self) -> io::Result<()> {
227        let temp_path = self.path.with_extension("tmp");
228
229        // Create temp file and keep handle to reuse after rename
230        let mut temp_file = OpenOptions::new()
231            .write(true)
232            .read(true)
233            .create(true)
234            .truncate(true)
235            .open(&temp_path)?;
236
237        temp_file.lock()?;
238        temp_file.write_all(BNDL_MAGIC)?;
239        let mut current_offset = HEADER_SIZE as u64;
240
241        // Copy only live entries from original to temp
242        for entry in self.index.values_mut() {
243            self.file.seek(SeekFrom::Start(entry.offset()))?;
244            temp_file.seek(SeekFrom::Start(current_offset))?;
245
246            // Stream data without allocating full buffer
247            let mut limited = (&mut self.file).take(entry.compressed_size());
248            io::copy(&mut limited, &mut temp_file)?;
249
250            entry.set_offset(current_offset);
251            let pad = pad::<8, u64>(entry.compressed_size());
252            if pad > 0 {
253                write_padding(&mut temp_file, pad as usize)?;
254            }
255            current_offset += entry.compressed_size() + pad;
256        }
257
258        // Write the index and footer
259        let index_start = current_offset;
260        for (name, entry) in &self.index {
261            temp_file.write_all(entry.as_bytes())?;
262            temp_file.write_all(name.as_bytes())?;
263            let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len());
264            if pad > 0 {
265                write_padding(&mut temp_file, pad)?;
266            }
267        }
268
269        let footer = Footer::new(index_start, self.index.len() as u32, FOOTER_MAGIC);
270        temp_file.write_all(footer.as_bytes())?;
271        temp_file.sync_all()?;
272
273        // Acquire exclusive lock just before rename to prevent concurrent access
274        self.file.lock()?;
275
276        // Release locks and close current file
277        drop(self.mmap.take());
278        let _ = self.file.unlock();
279
280        // Atomically replace original with temp
281        std::fs::rename(&temp_path, &self.path)?;
282
283        // Reuse temp_file handle (still valid after rename)
284        temp_file.lock_shared()?;
285        let mmap = unsafe { Mmap::map(&temp_file)? };
286
287        let footer_pos = mmap.len() - FOOTER_SIZE;
288        let footer = Footer::read_from_bytes(&mmap[footer_pos..]).map_err(|_| {
289            io::Error::new(
290                io::ErrorKind::InvalidData,
291                "Failed to read footer after vacuum",
292            )
293        })?;
294
295        self.file = temp_file;
296        self.mmap = Some(mmap);
297        self.data_end = footer.index_offset();
298
299        Ok(())
300    }
301
302    /// Reads an entry from the archive, decompressing if needed.
303    ///
304    /// Returns `None` if the entry doesn't exist or if CRC32 verification fails.
305    pub fn read<'a>(&'a self, name: &str) -> Option<Cow<'a, [u8]>> {
306        let entry = self.index.get(name)?;
307        let mmap = self.mmap.as_ref()?;
308
309        let data = if entry.compression_type() == Compress::Zstd {
310            let compressed_data = mmap.get(
311                entry.offset() as usize..(entry.offset() + entry.compressed_size()) as usize,
312            )?;
313            let mut out = Vec::with_capacity(entry.uncompressed_size() as usize);
314            zstd::Decoder::new(compressed_data)
315                .ok()?
316                .read_to_end(&mut out)
317                .ok()?;
318            Cow::Owned(out)
319        } else {
320            let uncompressed_data = mmap.get(
321                entry.offset() as usize..(entry.offset() + entry.uncompressed_size()) as usize,
322            )?;
323            Cow::Borrowed(uncompressed_data)
324        };
325
326        // Verify CRC32
327        let computed_crc = crc32fast::hash(&data);
328        if computed_crc != entry.crc32() {
329            return None;
330        }
331
332        Some(data)
333    }
334
335    /// Reads an entry into a provided buffer, avoiding allocation.
336    ///
337    /// Decompresses if needed and verifies CRC32. Returns the number of bytes read.
338    /// If the buffer is too small, only reads up to buffer.len() bytes.
339    ///
340    /// # Example
341    ///
342    /// ```no_run
343    /// use bindle_file::Bindle;
344    ///
345    /// let archive = Bindle::open("data.bndl")?;
346    /// let mut buffer = vec![0u8; 1024];
347    /// let bytes_read = archive.read_into("file.txt", &mut buffer)?;
348    /// # Ok::<(), std::io::Error>(())
349    /// ```
350    pub fn read_into(&self, name: &str, buffer: &mut [u8]) -> io::Result<usize> {
351        let mut reader = self.reader(name)?;
352        let bytes_read = reader.read(buffer)?;
353        reader.verify_crc32()?;
354        Ok(bytes_read)
355    }
356
357    /// Reads an entry and writes it to the given writer.
358    ///
359    /// Returns the number of bytes written. Verifies CRC32 after reading.
360    pub fn read_to<W: std::io::Write>(&self, name: &str, mut w: W) -> std::io::Result<u64> {
361        let mut reader = self.reader(name)?;
362        let bytes_copied = std::io::copy(&mut reader, &mut w)?;
363        reader.verify_crc32()?;
364        Ok(bytes_copied)
365    }
366
367    /// Returns a streaming reader for an entry.
368    ///
369    /// Automatically decompresses if the entry is compressed. Call [`Reader::verify_crc32()`] after reading to verify integrity.
370    pub fn reader<'a>(&'a self, name: &str) -> io::Result<Reader<'a>> {
371        let entry = self
372            .index
373            .get(name)
374            .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Entry not found"))?;
375
376        let start = entry.offset() as usize;
377        let end = start + entry.compressed_size() as usize;
378        let mmap = self
379            .mmap
380            .as_ref()
381            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing mmap"))?;
382        let data_slice = &mmap[start..end];
383
384        let cursor = io::Cursor::new(data_slice);
385
386        if entry.compression_type() == Compress::Zstd {
387            // Zstd streaming decoder
388            let decoder = zstd::Decoder::new(cursor)?;
389            Ok(Reader {
390                decoder: Either::Left(decoder),
391                crc32_hasher: Hasher::new(),
392                expected_crc32: entry.crc32(),
393            })
394        } else {
395            Ok(Reader {
396                decoder: Either::Right(cursor),
397                crc32_hasher: Hasher::new(),
398                expected_crc32: entry.crc32(),
399            })
400        }
401    }
402
403    /// Returns the number of entries in the archive.
404    pub fn len(&self) -> usize {
405        self.index.len()
406    }
407
408    /// Returns true if the archive contains no entries.
409    pub fn is_empty(&self) -> bool {
410        self.index.is_empty()
411    }
412
413    /// Returns a reference to the archive index.
414    ///
415    /// The index maps entry names to their metadata.
416    pub fn index(&self) -> &BTreeMap<String, Entry> {
417        &self.index
418    }
419
420    /// Removes all entries from the index.
421    ///
422    /// Call [`save()`](Bindle::save) to commit. Data remains in the file until [`vacuum()`](Bindle::vacuum) is called.
423    pub fn clear(&mut self) {
424        self.index.clear()
425    }
426
427    /// Returns true if an entry with the given name exists.
428    pub fn exists(&self, name: &str) -> bool {
429        self.index.contains_key(name)
430    }
431
432    /// Removes an entry from the index.
433    ///
434    /// Returns true if the entry existed. Data remains in the file until [`vacuum()`](Bindle::vacuum) is called.
435    pub fn remove(&mut self, name: &str) -> bool {
436        self.index.remove(name).is_some()
437    }
438
439    /// Recursively adds all files from a directory to the archive.
440    ///
441    /// File paths are stored relative to the source directory. Call [`save()`](Bindle::save) to commit.
442    pub fn pack<P: AsRef<Path>>(&mut self, src_dir: P, compress: Compress) -> io::Result<()> {
443        self.pack_recursive(src_dir.as_ref(), src_dir.as_ref(), compress)
444    }
445
446    fn pack_recursive(
447        &mut self,
448        base: &Path,
449        current: &Path,
450        compress: Compress,
451    ) -> io::Result<()> {
452        if current.is_dir() {
453            for entry in std::fs::read_dir(current)? {
454                self.pack_recursive(base, &entry?.path(), compress)?;
455            }
456        } else {
457            let name = current
458                .strip_prefix(base)
459                .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
460                .to_str()
461                .unwrap_or_default();
462            self.add_file(&name, current, compress)?;
463        }
464        Ok(())
465    }
466
467    /// Extracts all entries to a destination directory.
468    ///
469    /// Creates subdirectories as needed to match the stored paths.
470    pub fn unpack<P: AsRef<Path>>(&self, dest: P) -> io::Result<()> {
471        let dest_path = dest.as_ref();
472        std::fs::create_dir_all(dest_path)?;
473
474        // Collect all unique parent directories
475        let mut dirs = std::collections::HashSet::new();
476        for (name, _) in &self.index {
477            if let Some(parent) = Path::new(name).parent() {
478                // Only add non-empty parent paths
479                if parent != Path::new("") {
480                    dirs.insert(dest_path.join(parent));
481                }
482            }
483        }
484
485        // Create all directories upfront (sorted for parent-first order)
486        if !dirs.is_empty() {
487            let mut dirs: Vec<_> = dirs.into_iter().collect();
488            dirs.sort();
489            for dir in dirs {
490                std::fs::create_dir_all(&dir)?;
491            }
492        }
493
494        // Sort entries by physical offset for sequential reads (better cache locality)
495        let mut entries: Vec<_> = self.index.iter().collect();
496        entries.sort_by_key(|(_, entry)| entry.offset());
497
498        // Extract files without per-file directory checks
499        for (name, _) in entries {
500            let file_path = dest_path.join(name);
501            let mut reader = self.reader(name)?;
502            let mut file = File::create(&file_path)?;
503            io::copy(&mut reader, &mut file)?;
504            reader.verify_crc32()?;
505        }
506        Ok(())
507    }
508
509    /// Creates a streaming writer for adding an entry.
510    ///
511    /// The writer must be closed and then [`save()`](Bindle::save) must be called to commit the entry.
512    pub fn writer<'a>(&'a mut self, name: &str, compress: Compress) -> io::Result<Writer<'a>> {
513        self.file.lock()?;
514        // Only seek if not already at the correct position
515        let current_pos = self.file.stream_position()?;
516        if current_pos != self.data_end {
517            self.file.seek(SeekFrom::Start(self.data_end))?;
518        }
519        let compress = self.should_auto_compress(compress, 0);
520        let start_offset = self.data_end;
521        let encoder = if compress {
522            let f = self.file.try_clone()?;
523            Some(zstd::Encoder::new(f, 3)?)
524        } else {
525            None
526        };
527        Ok(Writer {
528            name: name.to_string(),
529            bindle: self,
530            encoder,
531            start_offset,
532            uncompressed_size: 0,
533            crc32_hasher: Hasher::new(),
534        })
535    }
536}
537
538impl Drop for Bindle {
539    fn drop(&mut self) {
540        let _ = self.file.unlock();
541    }
542}