Skip to main content

zipatch_rs/apply/
vfs.rs

1//! Filesystem abstraction for the apply layer.
2//!
3//! The apply layer never touches `std::fs` directly. All I/O is routed
4//! through a [`Vfs`] implementation owned by [`ApplySession`](super::ApplySession).
5//! Two concrete impls ship with the crate:
6//!
7//! - [`StdFs`] — wraps `std::fs`. The default.
8//! - [`InMemoryFs`] — pure in-memory backing. Useful for tests, dry-run
9//!   previews, sandbox/embedded environments, and Tauri-style "preview
10//!   what this patch would do" flows.
11//!
12//! The trait surface is intentionally minimal: every method corresponds
13//! directly to a call site in the apply or index-apply layer. New
14//! operations should only be added when an apply path needs one.
15//!
16//! # Handle erasure
17//!
18//! [`Vfs::open_write`] and [`Vfs::open_read`] return boxed trait objects.
19//! The apply hot path is dominated by DEFLATE decompression and syscalls;
20//! the virtual-call overhead of a `Box<dyn Write + Seek + Send>` is in
21//! the noise. Keeping the handles type-erased keeps [`ApplySession`](super::ApplySession)
22//! non-generic and the public API clean — `Chunk::apply(&mut ApplySession)`
23//! stays as-is instead of becoming `Chunk::apply<F: Vfs>(&mut ApplySession<F>)`.
24
25use std::io::{self, Read, Seek, Write};
26use std::path::{Path, PathBuf};
27use std::sync::{Arc, Mutex};
28
29/// Snapshot of a path's metadata. Only the fields the apply layer reads.
30#[derive(Debug, Clone, Copy)]
31pub struct VfsMetadata {
32    /// File length in bytes.
33    pub len: u64,
34    /// `true` if the entry is a regular file.
35    pub is_file: bool,
36}
37
38/// Writable, seekable handle returned by [`Vfs::open_write`].
39///
40/// Inherits `Write + Seek` plus the two extra operations the apply layer
41/// needs against a handle: truncate-to-len and durable-sync.
42pub trait VfsWrite: Write + Seek + Send {
43    /// Truncate the file the handle points at to `len` bytes.
44    fn set_len(&mut self, len: u64) -> io::Result<()>;
45
46    /// Force any pending writes (including buffered data inside the
47    /// handle, if any) through to durable storage. Equivalent to
48    /// `std::fs::File::sync_all` for the std backend.
49    fn sync_all(&mut self) -> io::Result<()>;
50}
51
52/// Readable, seekable handle returned by [`Vfs::open_read`].
53pub trait VfsRead: Read + Seek + Send {}
54
55impl<T: Read + Seek + Send + ?Sized> VfsRead for T {}
56
57/// Filesystem abstraction backing [`ApplySession`](super::ApplySession).
58///
59/// Implementations must be safe to share across threads (`Send + Sync`)
60/// so a configured [`ApplyConfig`](super::ApplyConfig) can be moved to a
61/// worker thread for the actual apply.
62///
63/// # Async usage
64///
65/// `Vfs` and its handle traits ([`VfsRead`], [`VfsWrite`]) are
66/// intentionally synchronous — the apply hot path is dominated by
67/// DEFLATE decompression and filesystem syscalls, and modelling the
68/// trait as `async` would force every consumer to pull in a runtime.
69/// See the crate-level "Async usage" section for the full rationale.
70///
71/// Async-backed filesystems (e.g. a tokio-based virtual fs or an
72/// `object_store`-style remote backend) implement the sync trait on a
73/// type that owns a [`tokio::runtime::Handle`-equivalent] and uses
74/// `Handle::block_on` inside each method, or — for the common
75/// "everything runs on a worker thread anyway" pattern — bridges
76/// through an [`mpsc`](std::sync::mpsc) request/response pair against
77/// a separate async task. Because the apply driver itself is expected
78/// to run inside `tokio::task::spawn_blocking`, neither bridge pattern
79/// stalls the reactor.
80///
81/// [`tokio::runtime::Handle`-equivalent]: https://docs.rs/tokio/latest/tokio/runtime/struct.Handle.html
82pub trait Vfs: Send + Sync {
83    /// Open `path` for writing, creating it if it does not exist.
84    ///
85    /// The handle is opened with `write=true, create=true, truncate=false`
86    /// — the apply layer manages truncation explicitly via
87    /// [`VfsWrite::set_len`] when a chunk demands it.
88    fn open_write(&self, path: &Path) -> io::Result<Box<dyn VfsWrite>>;
89
90    /// Open `path` for reading.
91    fn open_read(&self, path: &Path) -> io::Result<Box<dyn VfsRead>>;
92
93    /// Create `path` and every missing ancestor directory.
94    fn create_dir_all(&self, path: &Path) -> io::Result<()>;
95
96    /// Remove the file at `path`.
97    fn remove_file(&self, path: &Path) -> io::Result<()>;
98
99    /// Remove the (empty) directory at `path`.
100    fn remove_dir(&self, path: &Path) -> io::Result<()>;
101
102    /// Return entries directly inside `path`. Order is implementation-defined.
103    fn read_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>>;
104
105    /// Return `true` if any entry exists at `path`.
106    fn exists(&self, path: &Path) -> bool;
107
108    /// Return metadata for `path`. Returns `NotFound` if the path does not exist.
109    fn metadata(&self, path: &Path) -> io::Result<VfsMetadata>;
110}
111
112// ---------------------------------------------------------------------------
113// StdFs
114// ---------------------------------------------------------------------------
115
116/// Default [`Vfs`] backed by `std::fs`. Stateless, cheap to clone.
117#[derive(Debug, Default, Clone, Copy)]
118pub struct StdFs;
119
120impl StdFs {
121    /// Construct a new instance.
122    #[must_use]
123    pub const fn new() -> Self {
124        Self
125    }
126}
127
128impl VfsWrite for std::fs::File {
129    fn set_len(&mut self, len: u64) -> io::Result<()> {
130        std::fs::File::set_len(self, len)
131    }
132    fn sync_all(&mut self) -> io::Result<()> {
133        std::fs::File::sync_all(self)
134    }
135}
136
137impl Vfs for StdFs {
138    fn open_write(&self, path: &Path) -> io::Result<Box<dyn VfsWrite>> {
139        let file = std::fs::OpenOptions::new()
140            .write(true)
141            .create(true)
142            .truncate(false)
143            .open(path)?;
144        Ok(Box::new(file))
145    }
146
147    fn open_read(&self, path: &Path) -> io::Result<Box<dyn VfsRead>> {
148        Ok(Box::new(std::fs::File::open(path)?))
149    }
150
151    fn create_dir_all(&self, path: &Path) -> io::Result<()> {
152        std::fs::create_dir_all(path)
153    }
154
155    fn remove_file(&self, path: &Path) -> io::Result<()> {
156        std::fs::remove_file(path)
157    }
158
159    fn remove_dir(&self, path: &Path) -> io::Result<()> {
160        std::fs::remove_dir(path)
161    }
162
163    fn read_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>> {
164        let mut out = Vec::new();
165        for entry in std::fs::read_dir(path)? {
166            out.push(entry?.path());
167        }
168        Ok(out)
169    }
170
171    fn exists(&self, path: &Path) -> bool {
172        path.exists()
173    }
174
175    fn metadata(&self, path: &Path) -> io::Result<VfsMetadata> {
176        let m = std::fs::metadata(path)?;
177        Ok(VfsMetadata {
178            len: m.len(),
179            is_file: m.is_file(),
180        })
181    }
182}
183
184// ---------------------------------------------------------------------------
185// InMemoryFs
186// ---------------------------------------------------------------------------
187
188/// File entry inside an [`InMemoryFs`].
189#[derive(Debug, Default, Clone)]
190struct MemFile {
191    bytes: Vec<u8>,
192}
193
194/// Shared storage shape for [`InMemoryFs`]. Lives behind an `Arc<Mutex<_>>`
195/// so handles can write back into the same map their parent vfs handed out.
196#[derive(Debug, Default)]
197struct MemFsInner {
198    files: std::collections::BTreeMap<PathBuf, MemFile>,
199    dirs: std::collections::BTreeSet<PathBuf>,
200}
201
202/// In-memory [`Vfs`] backing. Useful for tests, dry-run previews, and
203/// sandboxed environments where the apply layer must not touch the host
204/// filesystem.
205///
206/// Cheap to clone — all clones share the same underlying storage, so a
207/// caller can hand a clone to [`ApplyConfig::with_vfs`](super::ApplyConfig::with_vfs)
208/// and then inspect the in-memory state after the apply returns via the
209/// retained clone.
210///
211/// # Storage shape
212///
213/// - Files: `BTreeMap<PathBuf, Vec<u8>>`, keyed by the absolute path the
214///   apply layer resolved them to.
215/// - Directories: `BTreeSet<PathBuf>` of created directories.
216/// - Both maps live behind a single `Arc<Mutex<_>>`. Contention is not a
217///   concern — the apply layer is single-threaded and holds the lock only
218///   for the duration of one syscall-shaped operation.
219#[derive(Debug, Default, Clone)]
220pub struct InMemoryFs {
221    inner: Arc<Mutex<MemFsInner>>,
222}
223
224impl InMemoryFs {
225    /// Construct an empty in-memory filesystem.
226    #[must_use]
227    pub fn new() -> Self {
228        Self::default()
229    }
230
231    /// Take a snapshot of every file's contents.
232    ///
233    /// The returned map is keyed by the absolute path the apply layer
234    /// wrote to. Use this after a successful apply to inspect the
235    /// resulting layout in tests.
236    ///
237    /// # Panics
238    ///
239    /// Panics if the internal mutex is poisoned.
240    #[must_use]
241    pub fn snapshot_files(&self) -> std::collections::BTreeMap<PathBuf, Vec<u8>> {
242        let inner = self.inner.lock().unwrap();
243        inner
244            .files
245            .iter()
246            .map(|(k, v)| (k.clone(), v.bytes.clone()))
247            .collect()
248    }
249
250    /// Take a snapshot of every created directory path.
251    ///
252    /// # Panics
253    ///
254    /// Panics if the internal mutex is poisoned.
255    #[must_use]
256    pub fn snapshot_dirs(&self) -> std::collections::BTreeSet<PathBuf> {
257        self.inner.lock().unwrap().dirs.clone()
258    }
259
260    /// Read the bytes of a single file. Returns `None` if absent.
261    ///
262    /// # Panics
263    ///
264    /// Panics if the internal mutex is poisoned.
265    #[must_use]
266    pub fn read_file(&self, path: &Path) -> Option<Vec<u8>> {
267        self.inner
268            .lock()
269            .unwrap()
270            .files
271            .get(path)
272            .map(|f| f.bytes.clone())
273    }
274}
275
276/// Writable handle into an [`InMemoryFs`]. Operates on a per-handle scratch
277/// buffer and commits the result back into shared storage on `flush` /
278/// `sync_all` / `set_len` / drop.
279struct MemFileHandle {
280    inner: Arc<Mutex<MemFsInner>>,
281    path: PathBuf,
282    cursor: u64,
283    // Scratch copy of the file's bytes; the canonical store is rewritten
284    // from this on every write-through point. Writes happen against this
285    // local buffer first so a partial write inside one syscall does not
286    // require the global lock around every byte.
287    bytes: Vec<u8>,
288}
289
290impl MemFileHandle {
291    fn commit(&self) {
292        let mut inner = self.inner.lock().unwrap();
293        inner
294            .files
295            .entry(self.path.clone())
296            .or_default()
297            .bytes
298            .clone_from(&self.bytes);
299    }
300}
301
302impl Drop for MemFileHandle {
303    fn drop(&mut self) {
304        self.commit();
305    }
306}
307
308impl Write for MemFileHandle {
309    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
310        let pos = usize::try_from(self.cursor)
311            .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "cursor overflow"))?;
312        let end = pos
313            .checked_add(buf.len())
314            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "write past usize"))?;
315        if end > self.bytes.len() {
316            self.bytes.resize(end, 0);
317        }
318        self.bytes[pos..end].copy_from_slice(buf);
319        self.cursor = end as u64;
320        Ok(buf.len())
321    }
322
323    fn flush(&mut self) -> io::Result<()> {
324        self.commit();
325        Ok(())
326    }
327}
328
329impl Seek for MemFileHandle {
330    fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
331        let new_pos: i128 = match pos {
332            io::SeekFrom::Start(n) => i128::from(n),
333            io::SeekFrom::Current(d) => i128::from(self.cursor) + i128::from(d),
334            io::SeekFrom::End(d) => i128::from(self.bytes.len() as u64) + i128::from(d),
335        };
336        if new_pos < 0 {
337            return Err(io::Error::new(
338                io::ErrorKind::InvalidInput,
339                "negative seek position",
340            ));
341        }
342        self.cursor = new_pos as u64;
343        Ok(self.cursor)
344    }
345}
346
347impl VfsWrite for MemFileHandle {
348    fn set_len(&mut self, len: u64) -> io::Result<()> {
349        let new_len = usize::try_from(len)
350            .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "len overflow"))?;
351        self.bytes.resize(new_len, 0);
352        if self.cursor > len {
353            self.cursor = len;
354        }
355        self.commit();
356        Ok(())
357    }
358
359    fn sync_all(&mut self) -> io::Result<()> {
360        self.commit();
361        Ok(())
362    }
363}
364
365struct MemReadHandle {
366    bytes: Vec<u8>,
367    cursor: u64,
368}
369
370impl Read for MemReadHandle {
371    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
372        let pos = usize::try_from(self.cursor).unwrap_or(usize::MAX);
373        if pos >= self.bytes.len() {
374            return Ok(0);
375        }
376        let available = &self.bytes[pos..];
377        let n = available.len().min(buf.len());
378        buf[..n].copy_from_slice(&available[..n]);
379        self.cursor += n as u64;
380        Ok(n)
381    }
382}
383
384impl Seek for MemReadHandle {
385    fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
386        let new_pos: i128 = match pos {
387            io::SeekFrom::Start(n) => i128::from(n),
388            io::SeekFrom::Current(d) => i128::from(self.cursor) + i128::from(d),
389            io::SeekFrom::End(d) => i128::from(self.bytes.len() as u64) + i128::from(d),
390        };
391        if new_pos < 0 {
392            return Err(io::Error::new(
393                io::ErrorKind::InvalidInput,
394                "negative seek position",
395            ));
396        }
397        self.cursor = new_pos as u64;
398        Ok(self.cursor)
399    }
400}
401
402impl Vfs for InMemoryFs {
403    fn open_write(&self, path: &Path) -> io::Result<Box<dyn VfsWrite>> {
404        let inner = self.inner.lock().unwrap();
405        let bytes = inner
406            .files
407            .get(path)
408            .map(|f| f.bytes.clone())
409            .unwrap_or_default();
410        drop(inner);
411        Ok(Box::new(MemFileHandle {
412            inner: Arc::clone(&self.inner),
413            path: path.to_path_buf(),
414            cursor: 0,
415            bytes,
416        }))
417    }
418
419    fn open_read(&self, path: &Path) -> io::Result<Box<dyn VfsRead>> {
420        let inner = self.inner.lock().unwrap();
421        let Some(f) = inner.files.get(path) else {
422            return Err(io::Error::from(io::ErrorKind::NotFound));
423        };
424        let bytes = f.bytes.clone();
425        Ok(Box::new(MemReadHandle { bytes, cursor: 0 }))
426    }
427
428    fn create_dir_all(&self, path: &Path) -> io::Result<()> {
429        let mut inner = self.inner.lock().unwrap();
430        let mut cur = PathBuf::new();
431        for comp in path.components() {
432            cur.push(comp);
433            inner.dirs.insert(cur.clone());
434        }
435        Ok(())
436    }
437
438    fn remove_file(&self, path: &Path) -> io::Result<()> {
439        let mut inner = self.inner.lock().unwrap();
440        if inner.files.remove(path).is_none() {
441            return Err(io::Error::from(io::ErrorKind::NotFound));
442        }
443        Ok(())
444    }
445
446    fn remove_dir(&self, path: &Path) -> io::Result<()> {
447        let mut inner = self.inner.lock().unwrap();
448        if !inner.dirs.remove(path) {
449            return Err(io::Error::from(io::ErrorKind::NotFound));
450        }
451        Ok(())
452    }
453
454    fn read_dir(&self, path: &Path) -> io::Result<Vec<PathBuf>> {
455        let inner = self.inner.lock().unwrap();
456        if !inner.dirs.contains(path) && !inner.files.keys().any(|p| p.parent() == Some(path)) {
457            return Err(io::Error::from(io::ErrorKind::NotFound));
458        }
459        let mut out = Vec::new();
460        for p in inner.files.keys() {
461            if p.parent() == Some(path) {
462                out.push(p.clone());
463            }
464        }
465        for d in &inner.dirs {
466            if d.parent() == Some(path) {
467                out.push(d.clone());
468            }
469        }
470        Ok(out)
471    }
472
473    fn exists(&self, path: &Path) -> bool {
474        let inner = self.inner.lock().unwrap();
475        inner.files.contains_key(path) || inner.dirs.contains(path)
476    }
477
478    fn metadata(&self, path: &Path) -> io::Result<VfsMetadata> {
479        let inner = self.inner.lock().unwrap();
480        if let Some(f) = inner.files.get(path) {
481            Ok(VfsMetadata {
482                len: f.bytes.len() as u64,
483                is_file: true,
484            })
485        } else if inner.dirs.contains(path) {
486            Ok(VfsMetadata {
487                len: 0,
488                is_file: false,
489            })
490        } else {
491            Err(io::Error::from(io::ErrorKind::NotFound))
492        }
493    }
494}
495
496#[cfg(test)]
497mod tests {
498    use super::*;
499    use std::io::SeekFrom;
500
501    #[test]
502    fn memfs_write_then_read_round_trips() {
503        let fs = InMemoryFs::new();
504        {
505            let mut w = fs.open_write(Path::new("/a/b.txt")).unwrap();
506            w.write_all(b"hello").unwrap();
507            w.flush().unwrap();
508        }
509        let bytes = fs.read_file(Path::new("/a/b.txt")).unwrap();
510        assert_eq!(bytes, b"hello");
511    }
512
513    #[test]
514    fn memfs_seek_write_extends_with_zeros() {
515        let fs = InMemoryFs::new();
516        let mut w = fs.open_write(Path::new("/x")).unwrap();
517        w.seek(SeekFrom::Start(8)).unwrap();
518        w.write_all(b"AB").unwrap();
519        w.flush().unwrap();
520        let bytes = fs.read_file(Path::new("/x")).unwrap();
521        assert_eq!(bytes.len(), 10);
522        assert_eq!(&bytes[..8], &[0u8; 8]);
523        assert_eq!(&bytes[8..], b"AB");
524    }
525
526    #[test]
527    fn memfs_set_len_truncates() {
528        let fs = InMemoryFs::new();
529        let mut w = fs.open_write(Path::new("/t")).unwrap();
530        w.write_all(b"abcdef").unwrap();
531        w.set_len(3).unwrap();
532        drop(w);
533        assert_eq!(fs.read_file(Path::new("/t")).unwrap(), b"abc");
534    }
535
536    #[test]
537    fn memfs_remove_file_clears_entry() {
538        let fs = InMemoryFs::new();
539        drop(fs.open_write(Path::new("/r")).unwrap());
540        assert!(fs.exists(Path::new("/r")));
541        fs.remove_file(Path::new("/r")).unwrap();
542        assert!(!fs.exists(Path::new("/r")));
543    }
544
545    #[test]
546    fn memfs_create_dir_all_records_each_ancestor() {
547        let fs = InMemoryFs::new();
548        fs.create_dir_all(Path::new("/a/b/c")).unwrap();
549        let dirs = fs.snapshot_dirs();
550        assert!(dirs.contains(Path::new("/a")));
551        assert!(dirs.contains(Path::new("/a/b")));
552        assert!(dirs.contains(Path::new("/a/b/c")));
553    }
554
555    #[test]
556    fn memfs_read_dir_enumerates_children() {
557        let fs = InMemoryFs::new();
558        fs.create_dir_all(Path::new("/p")).unwrap();
559        drop(fs.open_write(Path::new("/p/a")).unwrap());
560        drop(fs.open_write(Path::new("/p/b")).unwrap());
561        let entries = fs.read_dir(Path::new("/p")).unwrap();
562        assert_eq!(entries.len(), 2);
563    }
564
565    #[test]
566    fn stdfs_round_trip_against_tempdir() {
567        let tmp = tempfile::tempdir().unwrap();
568        let fs = StdFs::new();
569        let p = tmp.path().join("hello.txt");
570        {
571            let mut w = fs.open_write(&p).unwrap();
572            w.write_all(b"world").unwrap();
573            w.flush().unwrap();
574        }
575        let mut r = fs.open_read(&p).unwrap();
576        let mut buf = Vec::new();
577        r.read_to_end(&mut buf).unwrap();
578        assert_eq!(buf, b"world");
579    }
580}