Skip to main content

mount/
shell.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Platform-agnostic shell trait.
3//!
4//! [`PlatformShell`] is the seam where a thin per-platform adapter
5//! (FUSE on Linux, FSKit on macOS, ProjFS / CfAPI on Windows) plugs
6//! into the content-addressed core. The core implements this trait
7//! once, and each platform binding wraps it.
8//!
9//! Conceptually the trait is six pure operations: lookup, read,
10//! write, enumerate, attrs, invalidate. They mirror what every
11//! kernel-side filesystem hook ultimately needs to ask, so they can
12//! be implemented for an in-memory test mount, a Git-backed mount,
13//! a Heddle-state-backed mount, etc.
14
15use std::{
16    ffi::{OsStr, OsString},
17    path::Path,
18    time::SystemTime,
19};
20
21use objects::object::FileMode;
22
23use crate::error::{MountError, Result};
24
25/// Identifier for a filesystem node within a single mount session.
26///
27/// Reserved value `1` is the root, mirroring FUSE convention. Beyond
28/// that, the core hands out opaque ids that are stable for the
29/// lifetime of the mount but may be invalidated by [`PlatformShell::invalidate`]
30/// when the underlying state moves.
31#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
32pub struct NodeId(pub u64);
33
34impl NodeId {
35    /// Root inode id. FUSE always starts here.
36    pub const ROOT: NodeId = NodeId(1);
37}
38
39/// What a filesystem entry is, structurally.
40#[derive(Clone, Copy, Debug, PartialEq, Eq)]
41pub enum NodeKind {
42    Directory,
43    File,
44    Symlink,
45}
46
47/// A single directory entry, returned from [`PlatformShell::lookup`]
48/// and [`PlatformShell::enumerate`].
49#[derive(Clone, Debug)]
50pub struct Entry {
51    pub node: NodeId,
52    pub name: OsString,
53    pub kind: NodeKind,
54    pub size: u64,
55    /// Unix mode bits, including type. Cached so the platform shell
56    /// can answer `attrs` without a second walk.
57    pub unix_mode: u32,
58}
59
60/// Stat-style attributes for a single node.
61#[derive(Clone, Copy, Debug)]
62pub struct Attrs {
63    pub node: NodeId,
64    pub kind: NodeKind,
65    pub size: u64,
66    pub unix_mode: u32,
67    pub nlink: u32,
68    /// Modification / change times. The mount has no per-blob clock,
69    /// so we report a single fixed timestamp captured when the mount
70    /// was created. This keeps `ls -l` from showing nonsense and
71    /// makes diffs against a stable reference deterministic.
72    pub mtime: SystemTime,
73}
74
75/// Platform-agnostic operations every adapter implements against
76/// a shared core. Names mirror the eventual FUSE callbacks (and the
77/// equivalent FSKit / ProjFS hooks) so the platform layer can be
78/// almost trivial.
79///
80/// ## Write lifecycle
81///
82/// Mount writes flow through three calls:
83///
84/// 1. [`write`](PlatformShell::write) — kernel issues a sequence of
85///    `write(offset, bytes)` calls against an open file. The core
86///    accumulates these in an in-memory hot-tier buffer keyed by
87///    `NodeId`.
88/// 2. [`flush`](PlatformShell::flush) — kernel signals the buffer
89///    can be made durable (mapped to FUSE's `flush` callback, which
90///    fires on `close(2)` and on explicit fsync). The core promotes
91///    the hot buffer to a CAS blob and records `path -> blob_oid` in
92///    the per-thread pending tree. Buffer is dropped.
93/// 3. [`release`](PlatformShell::release) — kernel signals the file
94///    is closed and the inode handle can be retired. The default
95///    contract: identical to flush. FUSE doesn't always issue
96///    `flush` cleanly on every close path, so adapters should call
97///    `release` here too as a belt-and-braces measure.
98///
99/// Implementations MAY also promote a hot buffer opportunistically
100/// (e.g. after an idle window) — this is a safety net for files that
101/// the kernel never explicitly closes.
102///
103/// ## Platform notes
104///
105/// The three-call write lifecycle above describes the Linux/FUSE
106/// path verbatim — `fuser` delivers each `write(2)` syscall as a
107/// `write` callback, then `close(2)` triggers `flush` and `release`.
108/// FSKit on macOS exposes the same per-write granularity.
109///
110/// On Windows, ProjFS does not intercept individual writes: after a
111/// virtualized file is "hydrated" by the first read, subsequent
112/// writes go straight to NTFS and ProjFS only notifies the provider
113/// after the handle closes. The ProjFS adapter bridges this by
114/// reading the now-fully-hydrated file at close time and synthesizing
115/// a single `write(node, 0, full_contents)` + `flush(node)` against
116/// this trait. The hot-tier per-write buffer is therefore a
117/// Linux/FUSE (and FSKit) optimization — implementations of this
118/// trait can rely on the buffer being non-empty only on platforms
119/// that deliver per-write callbacks.
120pub trait PlatformShell {
121    /// Look up `name` inside `parent`. Returns `None` for ENOENT.
122    fn lookup(&self, parent: NodeId, name: &OsStr) -> Result<Option<Entry>>;
123
124    /// Read up to `buf.len()` bytes from `node`, starting at `offset`.
125    /// Returns the number of bytes actually written into `buf`.
126    fn read(&self, node: NodeId, offset: u64, buf: &mut [u8]) -> Result<usize>;
127
128    /// Write `data` to `node` at `offset`. Returns bytes written.
129    fn write(&self, node: NodeId, offset: u64, data: &[u8]) -> Result<usize>;
130
131    /// List the children of `dir`.
132    fn enumerate(&self, dir: NodeId) -> Result<Vec<Entry>>;
133
134    /// Stat `node`.
135    fn attrs(&self, node: NodeId) -> Result<Attrs>;
136
137    /// Drop any cached identity for `node`. The platform layer calls
138    /// this when the underlying state moves and previously-handed-out
139    /// inode numbers may now point at the wrong content.
140    fn invalidate(&self, node: NodeId) -> Result<()>;
141
142    /// Promote any hot-tier buffer for `node` into a CAS blob. The
143    /// FUSE `flush` callback dispatches here (fires on `close(2)`
144    /// and explicit fsync). Default: no-op for read-only mounts.
145    ///
146    /// Lifecycle note: FUSE `flush` fires on *every* descriptor close
147    /// — including the close of a `dup`-derived fd — so it can be
148    /// invoked multiple times before the last open handle is gone.
149    /// Implementations that maintain per-inode "is the directory
150    /// entry still gone?" state (orphan tracking) MUST defer the
151    /// final clear to [`Self::release`]; touching it here would let a
152    /// surviving fd's next write republish the unlinked pathname.
153    fn flush(&self, _node: NodeId) -> Result<()> {
154        Ok(())
155    }
156
157    /// Final close of `node`. The FUSE `release` callback dispatches
158    /// here; it fires once per `open(2)` after the last fd derived
159    /// from that open is closed. This is the canonical "last close of
160    /// the inode" signal — it is the right hook (NOT [`Self::flush`])
161    /// for retiring per-inode lifecycle state like orphan-tracking
162    /// markers or open-handle refcounts. Default: identical to flush
163    /// so shells that do not maintain per-inode lifecycle state
164    /// inherit a uniform contract.
165    fn release(&self, node: NodeId) -> Result<()> {
166        self.flush(node)
167    }
168
169    /// Notify the shell that a new open file handle for `node` has
170    /// been minted. FUSE adapters call this on the `open` / `create`
171    /// callbacks so the shell can maintain a per-inode open-handle
172    /// refcount — used to time the [`Self::release`] cleanup against
173    /// the *final* close instead of the first one. Default: no-op so
174    /// shells without lifecycle state are unaffected.
175    fn on_open(&self, _node: NodeId) -> Result<()> {
176        Ok(())
177    }
178
179    /// Create a fresh regular file under `parent`. Mints a [`NodeId`]
180    /// for the new file in the writable overlay and returns its
181    /// [`Entry`]; subsequent [`write`](PlatformShell::write) calls
182    /// land in the per-thread hot tier.
183    ///
184    /// When `exclusive` is true (`O_CREAT|O_EXCL`), the call must
185    /// fail with [`MountError::AlreadyExists`] if `name` already
186    /// resolves under `parent` (either in the captured tree or the
187    /// pending tier). When `exclusive` is false, a hit on an
188    /// existing entry is returned as-is (same shape as `lookup`).
189    ///
190    /// Default: [`MountError::ReadOnly`] — implementations that
191    /// don't support mutation inherit a uniform errno.
192    fn create_file(
193        &self,
194        _parent: NodeId,
195        _name: &OsStr,
196        _mode: FileMode,
197        _exclusive: bool,
198    ) -> Result<Entry> {
199        Err(MountError::ReadOnly)
200    }
201
202    /// Create an empty directory under `parent` in the overlay.
203    /// Returns the new directory's [`Entry`]. Fails with
204    /// [`MountError::AlreadyExists`] when `name` already resolves.
205    fn make_dir(&self, _parent: NodeId, _name: &OsStr) -> Result<Entry> {
206        Err(MountError::ReadOnly)
207    }
208
209    /// Delete the file named `name` under `parent`. The captured-tree
210    /// entry (if any) is tombstoned so [`lookup`](Self::lookup) /
211    /// [`enumerate`](Self::enumerate) skip it; any pending-tier hot
212    /// buffer or warm blob for the path is dropped.
213    ///
214    /// Fails with [`MountError::NotFound`] if `name` doesn't resolve,
215    /// or [`MountError::IsADirectory`] if it resolves to a directory.
216    fn unlink_entry(&self, _parent: NodeId, _name: &OsStr) -> Result<()> {
217        Err(MountError::ReadOnly)
218    }
219
220    /// Remove the empty directory named `name` under `parent`. Fails
221    /// with [`MountError::NotADirectory`] for a file, with
222    /// [`MountError::NotEmpty`] when the directory still has visible
223    /// children (across captured tree + pending tier), or
224    /// [`MountError::NotFound`] when nothing resolves.
225    fn rmdir_entry(&self, _parent: NodeId, _name: &OsStr) -> Result<()> {
226        Err(MountError::ReadOnly)
227    }
228
229    /// Atomically rename `(old_parent, old_name)` to
230    /// `(new_parent, new_name)`. Handles both same-directory and
231    /// cross-directory cases. Replacing an existing entry of the
232    /// same kind is allowed (POSIX semantics); replacing a directory
233    /// with a file (or vice-versa) fails with
234    /// [`MountError::IsADirectory`] / [`MountError::NotADirectory`].
235    fn rename_entry(
236        &self,
237        _old_parent: NodeId,
238        _old_name: &OsStr,
239        _new_parent: NodeId,
240        _new_name: &OsStr,
241    ) -> Result<()> {
242        Err(MountError::ReadOnly)
243    }
244
245    /// Same as [`Self::rename_entry`] but honours [`RenameOptions`] —
246    /// in particular `no_replace`, which atomically refuses the rename
247    /// when the destination already resolves. The check + the
248    /// directory-entry mutation MUST happen under a single critical
249    /// section to avoid a TOCTOU window between the existence check
250    /// and the rename itself. Default: ignore options and dispatch to
251    /// `rename_entry` (preserving the existing trait surface for
252    /// shells that do not yet support flags).
253    fn rename_entry_with_options(
254        &self,
255        old_parent: NodeId,
256        old_name: &OsStr,
257        new_parent: NodeId,
258        new_name: &OsStr,
259        _options: RenameOptions,
260    ) -> Result<()> {
261        self.rename_entry(old_parent, old_name, new_parent, new_name)
262    }
263
264    /// Apply attribute updates to `node`. Returns the post-update
265    /// [`Attrs`] so callers can reply without a second `getattr`
266    /// round trip. See [`AttrUpdate`] for which fields the overlay
267    /// actually persists; unsupported fields are no-ops.
268    fn set_attrs(&self, _node: NodeId, _update: AttrUpdate) -> Result<Attrs> {
269        Err(MountError::ReadOnly)
270    }
271
272    /// Create a symbolic link named `name` under `parent` whose
273    /// target is the byte-equivalent of `target`. Returns the new
274    /// link's [`Entry`].
275    fn create_symlink(&self, _parent: NodeId, _name: &OsStr, _target: &Path) -> Result<Entry> {
276        Err(MountError::ReadOnly)
277    }
278
279    /// Read the target of a symbolic link `node`. Returns the raw
280    /// bytes of the link target (which may not be valid UTF-8 on
281    /// some systems, hence [`OsString`]).
282    fn read_link(&self, _node: NodeId) -> Result<OsString> {
283        Err(MountError::ReadOnly)
284    }
285}
286
287/// Optional fields a caller may update via
288/// [`PlatformShell::set_attrs`]. Every field is `Option<_>`; `None`
289/// means "leave alone" (the kernel passes `None` for slots the
290/// `chmod`/`chown`/`truncate`/`utimensat` call didn't touch).
291///
292/// Heddle's tree model only carries three modes ([`FileMode::Normal`],
293/// [`FileMode::Executable`], [`FileMode::Symlink`]) — see
294/// `crates/objects/src/object/tree_types.rs`. A `chmod` that flips
295/// the user-executable bit (`0o100`) maps to the closest mode; bits
296/// outside that don't persist across `capture`.
297#[derive(Clone, Copy, Debug, Default)]
298pub struct AttrUpdate {
299    /// New unix mode bits (including the type bits). When set, the
300    /// shell folds the user-executable bit into the captured
301    /// [`FileMode`]; other bits don't persist.
302    pub mode: Option<u32>,
303    /// New uid. The mount has no per-node uid storage (every node
304    /// reports the mount-owner's uid); shells may accept this as a
305    /// no-op so `chown` doesn't return an error to callers that
306    /// don't actually need ownership tracking.
307    pub uid: Option<u32>,
308    /// New gid. Same no-op contract as `uid`.
309    pub gid: Option<u32>,
310    /// New size. Truncates the hot-tier buffer (or seeds one from
311    /// the durable predecessor and truncates) when set. `O_TRUNC`
312    /// on the kernel side delivers `setattr(size=0)` before the
313    /// first `write`.
314    pub size: Option<u64>,
315    /// New mtime in seconds since the UNIX epoch. The overlay has
316    /// no per-node mtime storage today; shells accept this as a
317    /// no-op so the kernel's `utimensat` doesn't return an error.
318    pub mtime_sec: Option<i64>,
319}
320
321/// Convert a Heddle [`FileMode`] into a node kind.
322pub(crate) fn kind_for_mode(mode: FileMode) -> NodeKind {
323    match mode {
324        FileMode::Normal | FileMode::Executable | FileMode::Gitlink => NodeKind::File,
325        FileMode::Symlink => NodeKind::Symlink,
326    }
327}
328
329/// The unix mode bits for a directory. Trees don't carry a mode of
330/// their own — they're synthesised at materialization time — so we
331/// keep one canonical value here.
332pub(crate) const DIR_UNIX_MODE: u32 = 0o040755;
333
334/// Optional flags for [`PlatformShell::rename_entry_with_options`].
335/// Mirrors the subset of Linux `renameat2(2)` flags the mount
336/// supports; non-applicable flags on non-Linux adapters can be left
337/// as their defaults.
338#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
339pub struct RenameOptions {
340    /// `RENAME_NOREPLACE`: refuse the rename with [`MountError::AlreadyExists`]
341    /// when the destination already resolves. Must be enforced inside
342    /// the same critical section as the rename so a concurrent writer
343    /// cannot install the destination between the check and the
344    /// mutation.
345    pub no_replace: bool,
346}
347
348#[cfg(test)]
349mod tests {
350    use std::{cell::Cell, ffi::OsStr, time::UNIX_EPOCH};
351
352    use super::*;
353
354    /// Minimal `PlatformShell` impl that supplies only the required
355    /// methods, so the test pins the *default* trait bodies for
356    /// every optional write-side hook. Tracks how often `flush` and
357    /// `rename_entry` are invoked so the delegation defaults
358    /// (`release` → `flush`, `rename_entry_with_options` →
359    /// `rename_entry`) can be observed.
360    #[derive(Default)]
361    struct StubShell {
362        flush_calls: Cell<u32>,
363        rename_calls: Cell<u32>,
364    }
365
366    impl PlatformShell for StubShell {
367        fn lookup(&self, _parent: NodeId, _name: &OsStr) -> Result<Option<Entry>> {
368            Ok(None)
369        }
370        fn read(&self, _node: NodeId, _offset: u64, _buf: &mut [u8]) -> Result<usize> {
371            Ok(0)
372        }
373        fn write(&self, _node: NodeId, _offset: u64, data: &[u8]) -> Result<usize> {
374            Ok(data.len())
375        }
376        fn enumerate(&self, _dir: NodeId) -> Result<Vec<Entry>> {
377            Ok(Vec::new())
378        }
379        fn attrs(&self, node: NodeId) -> Result<Attrs> {
380            Ok(Attrs {
381                node,
382                kind: NodeKind::File,
383                size: 0,
384                unix_mode: 0o100644,
385                nlink: 1,
386                mtime: UNIX_EPOCH,
387            })
388        }
389        fn invalidate(&self, _node: NodeId) -> Result<()> {
390            Ok(())
391        }
392        // Override flush so we can observe that `release`'s default
393        // delegates here. Everything else stays on the trait default.
394        fn flush(&self, _node: NodeId) -> Result<()> {
395            self.flush_calls.set(self.flush_calls.get() + 1);
396            Ok(())
397        }
398        // Override rename_entry so we can observe that
399        // `rename_entry_with_options`'s default delegates here.
400        fn rename_entry(&self, _op: NodeId, _on: &OsStr, _np: NodeId, _nn: &OsStr) -> Result<()> {
401            self.rename_calls.set(self.rename_calls.get() + 1);
402            Ok(())
403        }
404    }
405
406    fn is_read_only<T>(r: Result<T>) -> bool {
407        matches!(r, Err(MountError::ReadOnly))
408    }
409
410    #[test]
411    fn write_side_defaults_return_read_only() {
412        let s = StubShell::default();
413        let p = NodeId::ROOT;
414        let name = OsStr::new("x");
415
416        assert!(is_read_only(s.create_file(
417            p,
418            name,
419            FileMode::Normal,
420            false
421        ),));
422        assert!(is_read_only(s.make_dir(p, name)));
423        assert!(is_read_only(s.unlink_entry(p, name)));
424        assert!(is_read_only(s.rmdir_entry(p, name)));
425        assert!(is_read_only(s.set_attrs(NodeId(2), AttrUpdate::default())));
426        assert!(is_read_only(s.create_symlink(p, name, Path::new("target")),));
427        assert!(is_read_only(s.read_link(NodeId(2))));
428    }
429
430    #[test]
431    fn on_open_default_is_noop() {
432        let s = StubShell::default();
433        assert!(s.on_open(NodeId(7)).is_ok());
434    }
435
436    #[test]
437    fn release_default_delegates_to_flush() {
438        let s = StubShell::default();
439        assert_eq!(s.flush_calls.get(), 0);
440        s.release(NodeId(3)).expect("release");
441        assert_eq!(
442            s.flush_calls.get(),
443            1,
444            "release default must invoke flush exactly once",
445        );
446    }
447
448    #[test]
449    fn rename_with_options_default_delegates_to_rename_entry() {
450        let s = StubShell::default();
451        let opts = RenameOptions { no_replace: true };
452        // Default impl ignores the options and forwards to
453        // `rename_entry` — observe the delegation via the call count.
454        s.rename_entry_with_options(NodeId(1), OsStr::new("a"), NodeId(1), OsStr::new("b"), opts)
455            .expect("rename");
456        assert_eq!(s.rename_calls.get(), 1);
457        assert!(opts.no_replace, "RenameOptions field survives copy");
458        assert_eq!(
459            RenameOptions::default(),
460            RenameOptions { no_replace: false }
461        );
462    }
463
464    #[test]
465    fn kind_for_mode_maps_each_file_mode() {
466        assert_eq!(kind_for_mode(FileMode::Normal), NodeKind::File);
467        assert_eq!(kind_for_mode(FileMode::Executable), NodeKind::File);
468        assert_eq!(kind_for_mode(FileMode::Symlink), NodeKind::Symlink);
469    }
470}