Skip to main content

bindle_file/
bindle.rs

1use crc32fast::Hasher;
2use fs2::FileExt;
3use memmap2::Mmap;
4use std::borrow::Cow;
5use std::collections::BTreeMap;
6use std::fs::{File, OpenOptions};
7use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
8use std::path::{Path, PathBuf};
9use zerocopy::{FromBytes, IntoBytes};
10
11use crate::compress::Compress;
12use crate::entry::{Entry, Footer};
13use crate::reader::{Either, Reader};
14use crate::writer::Writer;
15use crate::{
16    AUTO_COMPRESS_THRESHOLD, BNDL_ALIGN, BNDL_MAGIC, ENTRY_SIZE, FOOTER_MAGIC, FOOTER_SIZE,
17    HEADER_SIZE, pad, write_padding,
18};
19
20/// A binary archive for collecting files.
21///
22/// Uses memory-mapped I/O for fast reads, supports optional zstd compression, and handles updates via shadowing.
23/// Files can be added incrementally without rewriting the entire archive.
24///
25/// # Example
26///
27/// ```no_run
28/// use bindle_file::{Bindle, Compress};
29///
30/// let mut archive = Bindle::open("data.bndl")?;
31/// archive.add("file.txt", b"data", Compress::None)?;
32/// archive.save()?;
33/// # Ok::<(), std::io::Error>(())
34/// ```
35pub struct Bindle {
36    pub(crate) path: PathBuf,
37    pub(crate) file: File,
38    pub(crate) mmap: Option<Mmap>,
39    pub(crate) index: BTreeMap<String, Entry>,
40    pub(crate) data_end: u64,
41}
42
43impl Bindle {
44    /// Creates a new archive, overwriting any existing file at the path.
45    pub fn create<P: AsRef<Path>>(path: P) -> io::Result<Self> {
46        let path_buf = path.as_ref().to_path_buf();
47        let opts = OpenOptions::new()
48            .truncate(true)
49            .read(true)
50            .write(true)
51            .create(true)
52            .to_owned();
53        Self::new(path_buf, opts)
54    }
55
56    /// Opens an existing archive or creates a new one if it doesn't exist.
57    pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
58        let path_buf = path.as_ref().to_path_buf();
59        let opts = OpenOptions::new()
60            .read(true)
61            .write(true)
62            .create(true)
63            .to_owned();
64        Self::new(path_buf, opts)
65    }
66
67    /// Opens an existing archive. Returns an error if the file doesn't exist.
68    pub fn load<P: AsRef<Path>>(path: P) -> io::Result<Self> {
69        let path_buf = path.as_ref().to_path_buf();
70        let opts = OpenOptions::new().read(true).write(true).to_owned();
71        Self::new(path_buf, opts)
72    }
73
74    /// Create a new `Bindle` from a path and file, the path must match the file
75    pub fn new(path: PathBuf, opts: OpenOptions) -> io::Result<Self> {
76        let mut file = opts.open(&path)?;
77        file.lock_shared()?;
78        let len = file.metadata()?.len();
79
80        // Handle completely new/empty files
81        if len == 0 {
82            file.write_all(BNDL_MAGIC)?;
83            return Ok(Self {
84                path,
85                file,
86                mmap: None,
87                index: BTreeMap::new(),
88                data_end: HEADER_SIZE as u64,
89            });
90        }
91
92        // Safety check: File must be at least HEADER + FOOTER size (24 bytes)
93        // This prevents "attempt to subtract with overflow" when calculating footer_pos
94        if len < (HEADER_SIZE + FOOTER_SIZE) as u64 {
95            return Err(io::Error::new(
96                io::ErrorKind::InvalidData,
97                "File too small to be a valid bindle",
98            ));
99        }
100
101        let mut header = [0u8; 8];
102        file.read_exact(&mut header)?;
103        if &header != BNDL_MAGIC {
104            return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid header"));
105        }
106
107        let m = unsafe { Mmap::map(&file)? };
108
109        // Calculate footer position. Subtraction is now safe due to the check above.
110        let footer_pos = m.len() - FOOTER_SIZE;
111        let footer = Footer::read_from_bytes(&m[footer_pos..])
112            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Failed to read footer"))?;
113
114        if footer.magic() != FOOTER_MAGIC {
115            return Err(io::Error::new(
116                io::ErrorKind::InvalidData,
117                "Invalid footer, the file may be corrupt",
118            ));
119        }
120
121        let data_end = footer.index_offset();
122        let count = footer.entry_count();
123        let mut index = BTreeMap::new();
124
125        let mut cursor = data_end as usize;
126        for _ in 0..count {
127            // Ensure there is enough data left for an Entry header
128            if cursor + ENTRY_SIZE > footer_pos {
129                break;
130            }
131
132            let entry = match Entry::read_from_bytes(&m[cursor..cursor + ENTRY_SIZE]) {
133                Ok(e) => e,
134                Err(_) => break, // Corrupted entry, stop reading
135            };
136            let n_start = cursor + ENTRY_SIZE;
137
138            // Validate that the filename exists within the mapped bounds
139            if n_start + entry.name_len() > footer_pos {
140                break;
141            }
142
143            let name =
144                String::from_utf8_lossy(&m[n_start..n_start + entry.name_len()]).into_owned();
145            index.insert(name, entry);
146
147            let total = ENTRY_SIZE + entry.name_len();
148            cursor += (total + (BNDL_ALIGN - 1)) & !(BNDL_ALIGN - 1);
149        }
150
151        Ok(Self {
152            path,
153            file,
154            mmap: Some(m),
155            index,
156            data_end,
157        })
158    }
159
160    fn should_auto_compress(&self, compress: Compress, len: usize) -> bool {
161        compress == Compress::Zstd || (compress == Compress::Auto && len > AUTO_COMPRESS_THRESHOLD)
162    }
163
164    /// Adds data to the archive with the given name.
165    ///
166    /// If an entry with the same name exists, it will be shadowed. Call [`save()`](Bindle::save) to commit changes.
167    pub fn add(&mut self, name: &str, data: &[u8], compress: Compress) -> io::Result<()> {
168        let mut stream = self.writer(name, compress)?;
169        stream.write_all(data)?;
170        stream.close()?;
171        Ok(())
172    }
173
174    /// Adds a file from the filesystem to the archive.
175    ///
176    /// Reads the file at `path` and stores it with the given `name`. Call [`save()`](Bindle::save) to commit changes.
177    pub fn add_file(
178        &mut self,
179        name: &str,
180        path: impl AsRef<Path>,
181        compress: Compress,
182    ) -> io::Result<()> {
183        let mut stream = self.writer(name, compress)?;
184        let mut src = std::fs::File::open(path)?;
185        std::io::copy(&mut src, &mut stream)?;
186        Ok(())
187    }
188
189    /// Commits all pending changes by writing the index and footer to disk.
190    ///
191    /// Must be called after add/remove operations to make changes persistent.
192    pub fn save(&mut self) -> io::Result<()> {
193        self.file.lock_exclusive()?;
194        self.file.seek(SeekFrom::Start(self.data_end))?;
195        let index_start = self.data_end;
196
197        // Use buffered writer to batch index writes
198        {
199            let mut writer = BufWriter::new(&mut self.file);
200            for (name, entry) in &self.index {
201                writer.write_all(entry.as_bytes())?;
202                writer.write_all(name.as_bytes())?;
203                let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len());
204                if pad > 0 {
205                    write_padding(&mut writer, pad)?;
206                }
207            }
208
209            let footer = Footer::new(index_start, self.index.len() as u32, FOOTER_MAGIC);
210            writer.write_all(footer.as_bytes())?;
211            writer.flush()?;
212        } // Drop writer here to release borrow
213
214        // Truncate file to current position to remove any old data
215        let current_pos = self.file.stream_position()?;
216        self.file.set_len(current_pos)?;
217
218        let mmap = unsafe { Mmap::map(&self.file)? };
219        self.mmap = Some(mmap);
220        self.file.lock_shared()?;
221        Ok(())
222    }
223
224    /// Reclaims space by removing shadowed data.
225    ///
226    /// Rebuilds the archive with only live entries, removing old versions of updated files.
227    pub fn vacuum(&mut self) -> io::Result<()> {
228        let temp_path = self.path.with_extension("tmp");
229
230        // Create temp file and keep handle to reuse after rename
231        let mut temp_file = OpenOptions::new()
232            .write(true)
233            .read(true)
234            .create(true)
235            .truncate(true)
236            .open(&temp_path)?;
237
238        temp_file.lock_exclusive()?;
239        temp_file.write_all(BNDL_MAGIC)?;
240        let mut current_offset = HEADER_SIZE as u64;
241
242        // Copy only live entries from original to temp
243        for entry in self.index.values_mut() {
244            self.file.seek(SeekFrom::Start(entry.offset()))?;
245            temp_file.seek(SeekFrom::Start(current_offset))?;
246
247            // Stream data without allocating full buffer
248            let mut limited = (&mut self.file).take(entry.compressed_size());
249            io::copy(&mut limited, &mut temp_file)?;
250
251            entry.set_offset(current_offset);
252            let pad = pad::<8, u64>(entry.compressed_size());
253            if pad > 0 {
254                write_padding(&mut temp_file, pad as usize)?;
255            }
256            current_offset += entry.compressed_size() + pad;
257        }
258
259        // Write the index and footer
260        let index_start = current_offset;
261        for (name, entry) in &self.index {
262            temp_file.write_all(entry.as_bytes())?;
263            temp_file.write_all(name.as_bytes())?;
264            let pad = pad::<BNDL_ALIGN, usize>(ENTRY_SIZE + name.len());
265            if pad > 0 {
266                write_padding(&mut temp_file, pad)?;
267            }
268        }
269
270        let footer = Footer::new(index_start, self.index.len() as u32, FOOTER_MAGIC);
271        temp_file.write_all(footer.as_bytes())?;
272        temp_file.sync_all()?;
273
274        // Acquire exclusive lock just before rename to prevent concurrent access
275        self.file.lock_exclusive()?;
276
277        // Release locks and close current file
278        drop(self.mmap.take());
279        let _ = self.file.unlock();
280
281        // Atomically replace original with temp
282        std::fs::rename(&temp_path, &self.path)?;
283
284        // Reuse temp_file handle (still valid after rename)
285        temp_file.lock_shared()?;
286        let mmap = unsafe { Mmap::map(&temp_file)? };
287
288        let footer_pos = mmap.len() - FOOTER_SIZE;
289        let footer = Footer::read_from_bytes(&mmap[footer_pos..]).map_err(|_| {
290            io::Error::new(
291                io::ErrorKind::InvalidData,
292                "Failed to read footer after vacuum",
293            )
294        })?;
295
296        self.file = temp_file;
297        self.mmap = Some(mmap);
298        self.data_end = footer.index_offset();
299
300        Ok(())
301    }
302
303    /// Reads an entry from the archive, decompressing if needed.
304    ///
305    /// Returns `None` if the entry doesn't exist or if CRC32 verification fails.
306    pub fn read<'a>(&'a self, name: &str) -> Option<Cow<'a, [u8]>> {
307        let entry = self.index.get(name)?;
308        let mmap = self.mmap.as_ref()?;
309
310        let data = if entry.compression_type() == Compress::Zstd {
311            let compressed_data = mmap.get(
312                entry.offset() as usize..(entry.offset() + entry.compressed_size()) as usize,
313            )?;
314            let mut out = Vec::with_capacity(entry.uncompressed_size() as usize);
315            zstd::Decoder::new(compressed_data)
316                .ok()?
317                .read_to_end(&mut out)
318                .ok()?;
319            Cow::Owned(out)
320        } else {
321            let uncompressed_data = mmap.get(
322                entry.offset() as usize..(entry.offset() + entry.uncompressed_size()) as usize,
323            )?;
324            Cow::Borrowed(uncompressed_data)
325        };
326
327        // Verify CRC32
328        let computed_crc = crc32fast::hash(&data);
329        if computed_crc != entry.crc32() {
330            return None;
331        }
332
333        Some(data)
334    }
335
336    /// Reads an entry and writes it to the given writer.
337    ///
338    /// Returns the number of bytes written. Verifies CRC32 after reading.
339    pub fn read_to<W: std::io::Write>(&self, name: &str, mut w: W) -> std::io::Result<u64> {
340        let mut reader = self.reader(name)?;
341        let bytes_copied = std::io::copy(&mut reader, &mut w)?;
342        reader.verify_crc32()?;
343        Ok(bytes_copied)
344    }
345
346    /// Returns a streaming reader for an entry.
347    ///
348    /// Automatically decompresses if the entry is compressed. Call [`Reader::verify_crc32()`] after reading to verify integrity.
349    pub fn reader<'a>(&'a self, name: &str) -> io::Result<Reader<'a>> {
350        let entry = self
351            .index
352            .get(name)
353            .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Entry not found"))?;
354
355        let start = entry.offset() as usize;
356        let end = start + entry.compressed_size() as usize;
357        let mmap = self
358            .mmap
359            .as_ref()
360            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing mmap"))?;
361        let data_slice = &mmap[start..end];
362
363        let cursor = io::Cursor::new(data_slice);
364
365        if entry.compression_type() == Compress::Zstd {
366            // Zstd streaming decoder
367            let decoder = zstd::Decoder::new(cursor)?;
368            Ok(Reader {
369                decoder: Either::Left(decoder),
370                crc32_hasher: Hasher::new(),
371                expected_crc32: entry.crc32(),
372            })
373        } else {
374            Ok(Reader {
375                decoder: Either::Right(cursor),
376                crc32_hasher: Hasher::new(),
377                expected_crc32: entry.crc32(),
378            })
379        }
380    }
381
382    /// Returns the number of entries in the archive.
383    pub fn len(&self) -> usize {
384        self.index.len()
385    }
386
387    /// Returns true if the archive contains no entries.
388    pub fn is_empty(&self) -> bool {
389        self.index.is_empty()
390    }
391
392    /// Returns a reference to the archive index.
393    ///
394    /// The index maps entry names to their metadata.
395    pub fn index(&self) -> &BTreeMap<String, Entry> {
396        &self.index
397    }
398
399    /// Removes all entries from the index.
400    ///
401    /// Call [`save()`](Bindle::save) to commit. Data remains in the file until [`vacuum()`](Bindle::vacuum) is called.
402    pub fn clear(&mut self) {
403        self.index.clear()
404    }
405
406    /// Returns true if an entry with the given name exists.
407    pub fn exists(&self, name: &str) -> bool {
408        self.index.contains_key(name)
409    }
410
411    /// Removes an entry from the index.
412    ///
413    /// Returns true if the entry existed. Data remains in the file until [`vacuum()`](Bindle::vacuum) is called.
414    pub fn remove(&mut self, name: &str) -> bool {
415        self.index.remove(name).is_some()
416    }
417
418    /// Recursively adds all files from a directory to the archive.
419    ///
420    /// File paths are stored relative to the source directory. Call [`save()`](Bindle::save) to commit.
421    pub fn pack<P: AsRef<Path>>(&mut self, src_dir: P, compress: Compress) -> io::Result<()> {
422        self.pack_recursive(src_dir.as_ref(), src_dir.as_ref(), compress)
423    }
424
425    fn pack_recursive(
426        &mut self,
427        base: &Path,
428        current: &Path,
429        compress: Compress,
430    ) -> io::Result<()> {
431        if current.is_dir() {
432            for entry in std::fs::read_dir(current)? {
433                self.pack_recursive(base, &entry?.path(), compress)?;
434            }
435        } else {
436            let name = current
437                .strip_prefix(base)
438                .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
439                .to_str()
440                .unwrap_or_default();
441            self.add_file(&name, current, compress)?;
442        }
443        Ok(())
444    }
445
446    /// Extracts all entries to a destination directory.
447    ///
448    /// Creates subdirectories as needed to match the stored paths.
449    pub fn unpack<P: AsRef<Path>>(&self, dest: P) -> io::Result<()> {
450        let dest_path = dest.as_ref();
451        std::fs::create_dir_all(dest_path)?;
452
453        // Collect all unique parent directories
454        let mut dirs = std::collections::HashSet::new();
455        for (name, _) in &self.index {
456            if let Some(parent) = Path::new(name).parent() {
457                // Only add non-empty parent paths
458                if parent != Path::new("") {
459                    dirs.insert(dest_path.join(parent));
460                }
461            }
462        }
463
464        // Create all directories upfront (sorted for parent-first order)
465        if !dirs.is_empty() {
466            let mut dirs: Vec<_> = dirs.into_iter().collect();
467            dirs.sort();
468            for dir in dirs {
469                std::fs::create_dir_all(&dir)?;
470            }
471        }
472
473        // Sort entries by physical offset for sequential reads (better cache locality)
474        let mut entries: Vec<_> = self.index.iter().collect();
475        entries.sort_by_key(|(_, entry)| entry.offset());
476
477        // Extract files without per-file directory checks
478        for (name, _) in entries {
479            let file_path = dest_path.join(name);
480            let mut reader = self.reader(name)?;
481            let mut file = File::create(&file_path)?;
482            io::copy(&mut reader, &mut file)?;
483            reader.verify_crc32()?;
484        }
485        Ok(())
486    }
487
488    /// Creates a streaming writer for adding an entry.
489    ///
490    /// The writer must be closed and then [`save()`](Bindle::save) must be called to commit the entry.
491    pub fn writer<'a>(&'a mut self, name: &str, compress: Compress) -> io::Result<Writer<'a>> {
492        self.file.lock_exclusive()?;
493        // Only seek if not already at the correct position
494        let current_pos = self.file.stream_position()?;
495        if current_pos != self.data_end {
496            self.file.seek(SeekFrom::Start(self.data_end))?;
497        }
498        let compress = self.should_auto_compress(compress, 0);
499        let start_offset = self.data_end;
500        let encoder = if compress {
501            let f = self.file.try_clone()?;
502            Some(zstd::Encoder::new(f, 3)?)
503        } else {
504            None
505        };
506        Ok(Writer {
507            name: name.to_string(),
508            bindle: self,
509            encoder,
510            start_offset,
511            uncompressed_size: 0,
512            crc32_hasher: Hasher::new(),
513        })
514    }
515}
516
517impl Drop for Bindle {
518    fn drop(&mut self) {
519        let _ = self.file.unlock();
520    }
521}