haz-vfs 0.2.0

Filesystem abstraction used by the haz task runner.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
//! [`Filesystem`] trait, its supporting value types, and the
//! [`FsError`] variant set every implementation produces.

use std::path::{Path, PathBuf};

use snafu::Snafu;

/// Failure modes shared by every [`Filesystem`] method.
///
/// Each variant prefixes its `Display` impl with strong-LTR ASCII
/// text before any path field, matching the path-rendering rule in
/// `docs/spec/06-paths.md`.
#[derive(Debug, Snafu)]
pub enum FsError {
    /// The path does not exist.
    #[snafu(display("path not found at: {}", path.display()))]
    NotFound {
        /// Path that was looked up.
        path: PathBuf,
    },

    /// The entry exists but is not a directory.
    #[snafu(display("path is not a directory at: {}", path.display()))]
    NotADirectory {
        /// Path that was queried.
        path: PathBuf,
    },

    /// The entry exists but is not a regular file.
    #[snafu(display("path is not a regular file at: {}", path.display()))]
    NotAFile {
        /// Path that was queried.
        path: PathBuf,
    },

    /// The entry exists but is not a symbolic link.
    #[snafu(display("path is not a symlink at: {}", path.display()))]
    NotASymlink {
        /// Path that was queried.
        path: PathBuf,
    },

    /// Symlink resolution exceeded the implementation's depth cap or
    /// formed a cycle.
    #[snafu(display("symlink loop detected at: {}", path.display()))]
    SymlinkLoop {
        /// Path at which the loop or depth-cap was hit.
        path: PathBuf,
    },

    /// The trait requires absolute paths. The input was relative.
    #[snafu(display("expected absolute path, got: {}", path.display()))]
    NotAbsolute {
        /// Relative path that was rejected.
        path: PathBuf,
    },

    /// The entry exists but its kind is not one this trait can
    /// classify. Currently used for non-symlink reparse points on
    /// Windows (e.g. dedup, `OneDrive` placeholders) where the
    /// platform's `std::fs::FileType` does not match any
    /// [`EntryKind`] variant.
    #[snafu(display("unknown entry kind at: {}", path.display()))]
    UnknownEntryKind {
        /// Path of the unclassifiable entry.
        path: PathBuf,
    },

    /// I/O failure not covered by another variant.
    #[snafu(display("I/O error at: {}: {source}", path.display()))]
    Io {
        /// Path that triggered the error.
        path: PathBuf,
        /// Underlying I/O error.
        source: std::io::Error,
    },
}

impl FsError {
    /// Map an [`std::io::Error`] to the most specific [`FsError`]
    /// variant available, attaching `path` for diagnostic context.
    ///
    /// [`std::io::ErrorKind::NotFound`] becomes [`FsError::NotFound`];
    /// every other kind is wrapped in [`FsError::Io`].
    #[must_use]
    pub fn from_io(path: PathBuf, e: std::io::Error) -> Self {
        match e.kind() {
            std::io::ErrorKind::NotFound => FsError::NotFound { path },
            _ => FsError::Io { path, source: e },
        }
    }
}

/// Kind of a filesystem entry.
///
/// Exhaustively enumerates the seven entry kinds POSIX defines (the
/// `S_IF*` bits in `st_mode`). Implementations targeting platforms
/// that cannot produce a given variant simply never return it; for
/// example, [`StdFilesystem`](crate::std_impl::StdFilesystem) on
/// Windows only produces [`EntryKind::Dir`], [`EntryKind::File`], and
/// [`EntryKind::Symlink`], and reports any non-symlink reparse point
/// as [`FsError::UnknownEntryKind`].
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EntryKind {
    /// Directory. When obtained via [`Filesystem::metadata`] (which
    /// follows symlinks), produced for both real directories and
    /// symlinks whose final target is a directory. When obtained
    /// via [`Filesystem::symlink_metadata`] or
    /// [`Filesystem::read_dir`], produced only for real directories.
    Dir,
    /// Regular file. Same following semantics as [`EntryKind::Dir`].
    File,
    /// Symbolic link. Produced only by methods that do not follow
    /// symlinks ([`Filesystem::symlink_metadata`] and
    /// [`Filesystem::read_dir`]). On Windows, NTFS junctions also
    /// surface as [`EntryKind::Symlink`], matching
    /// [`std::fs::FileType::is_symlink`].
    Symlink,
    /// Block device. Produced only on Unix-family platforms; never
    /// returned by [`StdFilesystem`](crate::std_impl::StdFilesystem)
    /// on Windows.
    BlockDevice,
    /// Character device. Unix-only.
    CharDevice,
    /// Named pipe (FIFO). Unix-only.
    Fifo,
    /// Unix-domain socket. Unix-only.
    Socket,
}

/// Lightweight metadata about a filesystem entry.
///
/// Carries the entry's kind and its byte size. Timestamps and
/// permissions remain out of scope for this trait; callers needing
/// permission bits for restoration use the manifest format
/// (`CACHE-011`) rather than re-querying the host.
///
/// `size` is the regular-file byte length on real filesystems
/// (`std::fs::Metadata::len()`). For non-file kinds the value is
/// implementation-defined on real filesystems (often the inode
/// size on Unix); the in-memory backend reports zero. Cache code
/// only consults `size` when [`Self::kind`] is
/// [`EntryKind::File`], so non-file values do not enter any
/// normative check.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct FsMetadata {
    /// What kind of entry this is.
    pub kind: EntryKind,
    /// Byte size of the entry. Meaningful for regular files; for
    /// other kinds the value is implementation-defined.
    pub size: u64,
}

/// One entry returned by [`Filesystem::read_dir`].
///
/// Carries both the absolute path of the entry and the entry's
/// metadata (without following symlinks), so callers can classify
/// directory contents without issuing a second `stat` per entry.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DirEntry {
    /// Absolute path of the entry.
    pub path: PathBuf,
    /// Metadata of the entry as observed without following symlinks
    /// (i.e., the result of [`Filesystem::symlink_metadata`]).
    pub metadata: FsMetadata,
}

/// Abstract filesystem used by all of `haz-discovery`.
///
/// Implementations MUST require absolute paths on every method and
/// return [`FsError::NotAbsolute`] when given a relative one.
/// Implementations MUST follow the symlink semantics documented on
/// each method.
///
/// The trait is deliberately minimal. It exposes only the
/// operations that workspace-root walking, glob expansion, and
/// `*.yml` loading need. Future phases that require additional
/// operations (write access for the cache, file-watching for
/// incremental rebuilds) SHOULD introduce dedicated traits rather
/// than extending this one.
pub trait Filesystem {
    /// The implementation-specific canonical-path type produced by
    /// [`Filesystem::canonicalize`].
    ///
    /// Each implementation chooses its own type for this associated
    /// item, and the type is constructible only inside the
    /// implementing module (in particular, only by that module's
    /// [`canonicalize`](Filesystem::canonicalize) method). Different
    /// implementations therefore yield distinct canonical-path
    /// types, even when the underlying byte sequence is the same;
    /// this prevents accidental mixing of canonical paths produced
    /// by different filesystems (for example, a `MemFilesystem`'s
    /// test-fixture canonical path can NOT be passed to code
    /// expecting a [`StdFilesystem`]'s host-FS canonical path).
    ///
    /// [`StdFilesystem`]: crate::StdFilesystem
    type CanonicalPath: AsRef<Path> + Clone + Eq + std::hash::Hash + std::fmt::Debug;

    /// Return metadata for the entry at `path`, FOLLOWING symlinks.
    /// If `path` is a symlink, the returned metadata describes the
    /// target.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if any component of the path does not
    /// exist, [`FsError::SymlinkLoop`] if symlink resolution exceeds
    /// the implementation's depth cap, and
    /// [`FsError::UnknownEntryKind`] if the resolved entry's kind is
    /// not one of [`EntryKind`]'s variants (currently only reachable
    /// for non-symlink Windows reparse points).
    fn metadata(&self, path: &Path) -> Result<FsMetadata, FsError>;

    /// Return metadata for the entry at `path`, WITHOUT following a
    /// trailing symlink. Intermediate symlinks in the path are
    /// followed; only the last component is returned as-is.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if any component of the path does not
    /// exist, [`FsError::SymlinkLoop`] if an intermediate-component
    /// symlink loops or exhausts the depth cap (the trailing symlink
    /// is not followed and therefore cannot itself loop), and
    /// [`FsError::UnknownEntryKind`] if the entry's kind is not one
    /// of [`EntryKind`]'s variants.
    fn symlink_metadata(&self, path: &Path) -> Result<FsMetadata, FsError>;

    /// Read all entries in the directory at `path`. The order of
    /// the returned entries is implementation-defined; callers MUST
    /// NOT depend on a particular order.
    ///
    /// Each entry's metadata is returned WITHOUT following symlinks
    /// (matching `std::fs::DirEntry::file_type` behaviour).
    /// Intermediate symlinks in `path` ARE followed when locating
    /// the directory itself.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if `path` does not exist,
    /// [`FsError::NotADirectory`] if `path` is not a directory,
    /// [`FsError::SymlinkLoop`] if an intermediate-component symlink
    /// loops or exhausts the depth cap, and
    /// [`FsError::UnknownEntryKind`] if any listed entry's kind is
    /// not one of [`EntryKind`]'s variants.
    fn read_dir(&self, path: &Path) -> Result<Vec<DirEntry>, FsError>;

    /// Read the entire file at `path` and return its raw bytes.
    /// Symlinks in any component (including the last) are followed.
    ///
    /// Decoding (UTF-8 or otherwise) is the caller's responsibility:
    /// the filesystem layer has no opinion on file contents and
    /// MUST NOT inspect them.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if the file does not exist,
    /// [`FsError::NotAFile`] if `path` (after symlink resolution) is
    /// not a regular file, [`FsError::SymlinkLoop`] if symlink
    /// resolution loops or exhausts the depth cap, or
    /// [`FsError::Io`] for any other I/O failure.
    fn read(&self, path: &Path) -> Result<Vec<u8>, FsError>;

    /// Return the Unix permission bits of the regular file at
    /// `path`, masked to the low 12 bits (the conventional
    /// `S_IRWXU | S_IRWXG | S_IRWXO | S_ISUID | S_ISGID | S_ISVTX`
    /// range). Symlinks in any component (including the last) are
    /// followed, matching [`Self::read`]'s semantics.
    ///
    /// This is the read counterpart of
    /// [`WritableFilesystem::set_permissions`]. The cache layer
    /// records a file's mode at store time so a later
    /// [`crate::WritableFilesystem::set_permissions`] call during
    /// restoration can recreate it; this method is the path callers
    /// take to obtain the mode in the first place.
    ///
    /// On Windows, the returned value is best-effort: the
    /// implementation MAY synthesise a Unix-like mode from the
    /// platform's read-only flag (`0o644` writable / `0o444`
    /// read-only) rather than reading true POSIX bits, matching
    /// the loss-of-fidelity already documented on
    /// [`WritableFilesystem::set_permissions`].
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if the file does not exist,
    /// [`FsError::NotAFile`] if `path` (after symlink resolution) is
    /// not a regular file, [`FsError::SymlinkLoop`] if symlink
    /// resolution loops or exhausts the depth cap, or
    /// [`FsError::Io`] for any other I/O failure.
    fn permissions(&self, path: &Path) -> Result<u32, FsError>;

    /// Resolve all symlinks and `.` / `..` components in `path` and
    /// return the canonical absolute path as the implementation's
    /// [`Self::CanonicalPath`] type. Every intermediate and terminal
    /// symlink is followed; the implementation MUST detect cycles
    /// and depth-cap exhaustion as [`FsError::SymlinkLoop`].
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if any component does not exist after
    /// symlink resolution, and [`FsError::SymlinkLoop`] on cycles or
    /// depth-cap exhaustion.
    fn canonicalize(&self, path: &Path) -> Result<Self::CanonicalPath, FsError>;

    /// Read the target of the symlink at `path`. Does NOT follow
    /// the symlink itself (that would defeat the purpose);
    /// intermediate components ARE resolved as usual.
    ///
    /// The returned target is verbatim: it MAY be either an absolute
    /// path or a path relative to the directory containing `path`.
    /// Implementations MUST NOT canonicalise or otherwise rewrite
    /// the target before returning it; callers that need an absolute
    /// path should resolve the result themselves.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if `path` does not exist,
    /// [`FsError::NotASymlink`] if the entry is not a symlink, and
    /// [`FsError::SymlinkLoop`] if an intermediate-component symlink
    /// loops or exhausts the depth cap.
    fn read_link(&self, path: &Path) -> Result<PathBuf, FsError>;
}

/// Mutating operations needed by callers that persist state on the
/// filesystem (today: the cache layer in `haz-cache`).
///
/// Kept as a sibling sub-trait of [`Filesystem`] rather than folded
/// into it: most callers (workspace discovery, configuration
/// parsing) only need reads, and a smaller surface is easier to
/// reason about. Implementations of this trait MUST also implement
/// [`Filesystem`], so a single handle covers both read and write
/// concerns for callers that need them.
///
/// Methods take `&self`, mirroring [`Filesystem`]. Implementations
/// that need to mutate internal state (e.g. an in-memory backend)
/// MUST use interior mutability.
///
/// Path semantics: every method requires absolute paths and returns
/// [`FsError::NotAbsolute`] for relative inputs, matching
/// [`Filesystem`]'s convention.
///
/// Overwrite semantics: [`Self::write_file`] and [`Self::rename`]
/// MAY replace an existing entry. Callers that need fail-on-exists
/// semantics are responsible for pre-checking; the trait does not
/// offer it because the cache's two-phase store (write fresh files
/// in a tmp dir, then [`Self::rename`] into place) does not need
/// it.
///
/// Durability semantics: [`Self::write_file`] and [`Self::rename`]
/// are NOT durable on their own. Callers that need durability
/// across power loss MUST follow them with [`Self::fsync_file`]
/// (for the written file) and/or [`Self::fsync_dir`] (for the
/// parent directory of a renamed entry). On in-memory backends the
/// fsync methods are no-ops.
pub trait WritableFilesystem: Filesystem {
    /// Create the directory at `path` along with any missing
    /// intermediate components. Idempotent: succeeds when `path`
    /// already exists as a directory.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotADirectory`] if any existing component along
    /// the path is not a directory, and [`FsError::Io`] for any
    /// other I/O failure.
    fn create_dir_all(&self, path: &Path) -> Result<(), FsError>;

    /// Write `contents` to `path`, overwriting any existing file.
    /// The immediate parent directory MUST already exist; the
    /// method does NOT create it (use [`Self::create_dir_all`]
    /// beforehand).
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if the parent directory does not
    /// exist, [`FsError::NotADirectory`] if the parent is not a
    /// directory, and [`FsError::Io`] for any other I/O failure.
    fn write_file(&self, path: &Path, contents: &[u8]) -> Result<(), FsError>;

    /// Rename `from` to `to`. On Unix this is `rename(2)`; on
    /// Windows it is `MoveFileExW` with `MOVEFILE_REPLACE_EXISTING`.
    /// When `to` exists, it is atomically replaced (subject to the
    /// host platform's guarantees; same filesystem only on Unix).
    ///
    /// The cache's two-phase store depends on this method being
    /// atomic on a single filesystem: a partially-written entry MUST
    /// NOT become visible to a concurrent lookup.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if either path is relative,
    /// [`FsError::NotFound`] if `from` does not exist, and
    /// [`FsError::Io`] for any other I/O failure (e.g. cross-device
    /// rename).
    fn rename(&self, from: &Path, to: &Path) -> Result<(), FsError>;

    /// Recursively remove `path` and every entry below it.
    /// Idempotent on missing paths: a `path` that does not exist
    /// returns [`FsError::NotFound`]; callers that want a true
    /// "remove if exists" pre-check with [`Filesystem::metadata`].
    ///
    /// Symlinks encountered during the walk are removed as links;
    /// the targets they point to are NOT followed and NOT removed.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if `path` does not exist, and
    /// [`FsError::Io`] for any other I/O failure.
    fn remove_dir_all(&self, path: &Path) -> Result<(), FsError>;

    /// Set the Unix permission bits on `path` to `mode` (the low
    /// 12 bits of `mode` correspond to the POSIX `S_IRWXU`,
    /// `S_IRWXG`, `S_IRWXO`, `S_ISUID`, `S_ISGID`, `S_ISVTX` bits).
    ///
    /// On Windows this is best-effort: the platform's permission
    /// model does not map cleanly to Unix bits, so the
    /// implementation MAY honour only the read-only flag (mapped
    /// from owner-write). Callers MUST NOT depend on round-trip
    /// fidelity of arbitrary Unix bits across platforms.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if `path` does not exist, and
    /// [`FsError::Io`] for any other I/O failure.
    fn set_permissions(&self, path: &Path, mode: u32) -> Result<(), FsError>;

    /// Flush the contents and metadata of the file at `path` to
    /// durable storage. On Unix this is `fsync(2)` on an
    /// `open(2)`-ed descriptor; on Windows this is
    /// `FlushFileBuffers`. On in-memory implementations this is a
    /// documented no-op.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if `path` does not exist, and
    /// [`FsError::Io`] for any other I/O failure.
    fn fsync_file(&self, path: &Path) -> Result<(), FsError>;

    /// Flush the entry list of the directory at `path` to durable
    /// storage. POSIX requires this after `rename(2)` to make the
    /// rename itself durable across power loss. On Windows this
    /// MAY be a no-op (the platform does not expose the operation
    /// for directories). On in-memory implementations this is a
    /// documented no-op.
    ///
    /// # Errors
    ///
    /// Returns [`FsError::NotAbsolute`] if `path` is relative,
    /// [`FsError::NotFound`] if `path` does not exist,
    /// [`FsError::NotADirectory`] if `path` is not a directory,
    /// and [`FsError::Io`] for any other I/O failure.
    fn fsync_dir(&self, path: &Path) -> Result<(), FsError>;
}