Skip to main content

seshat_cli/
dangerous_path.rs

1//! Dangerous-path denylist for `seshat serve`.
2//!
3//! When `serve` is invoked from a denylisted directory and there is no git
4//! repository nearby, it would otherwise try to scan and recursively watch a
5//! huge tree (`$HOME`, `/`, …), which `notify-debouncer-full` translates into
6//! tens of GB of memory growth. `is_dangerous_cwd` returns `true` if a path
7//! matches the per-OS built-in denylist or any user-supplied additional entry.
8//!
9//! Comparison rules:
10//! - Both candidate and denylist entries are canonicalized via
11//!   [`std::fs::canonicalize`]; symlinks resolve.
12//! - Matching is component-wise via [`std::path::Path::starts_with`], so `/var2` does
13//!   not match `/var`.
14//! - On macOS and Windows the comparison is case-insensitive (lowercased via
15//!   `to_string_lossy().to_lowercase()`); on Linux it is byte-exact.
16//! - Built-in entries that don't resolve on this machine are silently skipped.
17//! - Malformed `additional` entries (relative paths) are skipped with a
18//!   `tracing::warn!` log.
19
20use std::path::{Path, PathBuf};
21
22/// Check whether `path` is a dangerous cwd — equal to or a descendant of a
23/// built-in (per-OS) or user-supplied denylist entry.
24///
25/// See the module-level docs for full matching rules.
26pub fn is_dangerous_cwd(path: &Path, additional: &[String]) -> bool {
27    let home = dirs::home_dir();
28    if home.is_none() {
29        // Stripped env (systemd unit, container, sandbox without HOME/USERPROFILE/passwd):
30        // every $HOME-derived denylist entry silently vanishes. Warn loudly so operators
31        // understand why an obviously-dangerous cwd may not be flagged. We do NOT fail
32        // closed here: the absolute entries (`/`, `/var`, drive roots, …) still apply,
33        // and failing closed would break legitimate use from non-home, non-system trees.
34        tracing::warn!(
35            "could not resolve home directory; \
36             $HOME-derived dangerous-cwd entries are inactive for this invocation"
37        );
38    }
39    is_dangerous_cwd_with_home(path, additional, home.as_deref())
40}
41
42/// Test-injectable variant of [`is_dangerous_cwd`] that takes an explicit
43/// home directory instead of resolving via [`dirs::home_dir`].
44pub(crate) fn is_dangerous_cwd_with_home(
45    path: &Path,
46    additional: &[String],
47    home: Option<&Path>,
48) -> bool {
49    let canonical_candidate = canonicalize_or_self(path);
50    let builtin = builtin_denylist(home);
51    is_dangerous_inner(&canonical_candidate, additional, &builtin)
52}
53
54/// Returns `true` when `path` (canonicalized) is EQUAL to a built-in or
55/// user-supplied denylist entry — not merely a descendant of one.
56///
57/// Used by `db::check_serve_dangerous_cwd` and `db::check_repo_override_dangerous`
58/// to detect a stray `.git` at a dangerous root (e.g. `~/.git` for dotfiles
59/// users). When `find_git_root` walks up from a non-git cwd inside `$HOME`
60/// and lands on `$HOME/.git`, the resolved git root IS the dangerous root
61/// itself — not a real project — so the guard must continue to refuse.
62///
63/// Distinct from [`is_dangerous_cwd_with_home`]: that one returns `true`
64/// for both exact matches AND descendants; this one is exact-only.
65pub(crate) fn is_exact_denylist_entry(
66    path: &Path,
67    additional: &[String],
68    home: Option<&Path>,
69) -> bool {
70    let canonical = canonicalize_or_self(path);
71    let builtin = builtin_denylist(home);
72    if builtin.iter().any(|entry| paths_equal(&canonical, entry)) {
73        return true;
74    }
75    for raw in additional {
76        let trimmed = raw.trim_start();
77        if trimmed.starts_with('~') || trimmed.starts_with('$') || trimmed.starts_with('%') {
78            continue;
79        }
80        let entry_path = Path::new(raw);
81        if !entry_path.is_absolute() {
82            continue;
83        }
84        let Ok(canonical_entry) = std::fs::canonicalize(entry_path) else {
85            continue;
86        };
87        if paths_equal(&canonical, &canonical_entry) {
88            return true;
89        }
90    }
91    false
92}
93
94/// Path equality with the same case-folding rules as [`path_matches`].
95fn paths_equal(a: &Path, b: &Path) -> bool {
96    #[cfg(any(target_os = "macos", target_os = "windows"))]
97    {
98        a.to_string_lossy().to_lowercase() == b.to_string_lossy().to_lowercase()
99    }
100    #[cfg(not(any(target_os = "macos", target_os = "windows")))]
101    {
102        a == b
103    }
104}
105
106/// Shared implementation: `candidate_canonical` is matched against `builtin`
107/// followed by any absolute, resolvable entries in `additional`.
108fn is_dangerous_inner(
109    candidate_canonical: &Path,
110    additional: &[String],
111    builtin: &[PathBuf],
112) -> bool {
113    for entry in builtin {
114        if path_matches(candidate_canonical, entry) {
115            return true;
116        }
117    }
118
119    for raw in additional {
120        // Catch common misconfigurations that silently fail otherwise:
121        // tilde and env-var prefixes are NOT expanded (per the field's
122        // doc comment in `ScanConfig`). Warn the user so the silent-skip
123        // doesn't read as "I told it about /tmp but it ignored me".
124        let trimmed = raw.trim_start();
125        if trimmed.starts_with('~') || trimmed.starts_with('$') || trimmed.starts_with('%') {
126            tracing::warn!(
127                entry = %raw,
128                "additional_denylist_paths entry uses tilde or env-var syntax; \
129                 these are NOT expanded — use an absolute path instead — skipping"
130            );
131            continue;
132        }
133        let entry_path = Path::new(raw);
134        if !entry_path.is_absolute() {
135            tracing::warn!(
136                entry = %raw,
137                "additional_denylist_paths entry is not an absolute path; skipping"
138            );
139            continue;
140        }
141        let Ok(canonical) = std::fs::canonicalize(entry_path) else {
142            // Non-existent / unreadable entries are silent per spec
143            // (see PRD US-001 AC: "Denylist entries that don't exist on
144            // the current machine are silently skipped"). We still trace
145            // at debug for diagnosis but do not warn.
146            tracing::debug!(
147                entry = %raw,
148                "additional_denylist_paths entry could not be canonicalized; skipping"
149            );
150            continue;
151        };
152        if path_matches(candidate_canonical, &canonical) {
153            return true;
154        }
155    }
156
157    false
158}
159
160/// Canonicalize `path`, falling back to the path as-given on failure
161/// (e.g. when the path doesn't exist on disk).
162fn canonicalize_or_self(path: &Path) -> PathBuf {
163    std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
164}
165
166/// Push `path`'s canonical form to `out` if it resolves on this machine;
167/// silently skip otherwise.
168fn push_canonical(out: &mut Vec<PathBuf>, path: &Path) {
169    if let Ok(canonical) = std::fs::canonicalize(path) {
170        out.push(canonical);
171    }
172}
173
174/// Component-wise prefix match: returns `true` when `candidate == entry` or
175/// `candidate` is a descendant of `entry`. On macOS/Windows the comparison is
176/// case-insensitive; on Linux it is byte-exact.
177fn path_matches(candidate: &Path, entry: &Path) -> bool {
178    #[cfg(any(target_os = "macos", target_os = "windows"))]
179    {
180        let lc_candidate = candidate.to_string_lossy().to_lowercase();
181        let lc_entry = entry.to_string_lossy().to_lowercase();
182        Path::new(&lc_candidate).starts_with(Path::new(&lc_entry))
183    }
184    #[cfg(not(any(target_os = "macos", target_os = "windows")))]
185    {
186        candidate.starts_with(entry)
187    }
188}
189
190#[cfg(target_os = "macos")]
191fn builtin_denylist(home: Option<&Path>) -> Vec<PathBuf> {
192    let mut entries = Vec::new();
193    if let Some(h) = home {
194        for sub in [
195            "",
196            "Library",
197            "Documents",
198            "Downloads",
199            "Desktop",
200            "Pictures",
201            "Movies",
202            "Music",
203            "Public",
204            ".config",
205            ".cache",
206        ] {
207            let p = if sub.is_empty() {
208                h.to_path_buf()
209            } else {
210                h.join(sub)
211            };
212            push_canonical(&mut entries, &p);
213        }
214    }
215    for absolute in [
216        "/",
217        "/Users",
218        "/Applications",
219        "/System",
220        "/Library",
221        "/private",
222        "/tmp",
223        "/var",
224        "/usr",
225        "/etc",
226        "/opt",
227        // External-volume mounts: a 1 TB drive at `/Volumes/Photos`
228        // would reproduce the original 90+ GB recursive-walk leak.
229        "/Volumes",
230        "/Network",
231    ] {
232        push_canonical(&mut entries, Path::new(absolute));
233    }
234    entries
235}
236
237#[cfg(target_os = "linux")]
238fn builtin_denylist(home: Option<&Path>) -> Vec<PathBuf> {
239    let mut entries = Vec::new();
240    if let Some(h) = home {
241        push_canonical(&mut entries, h);
242    }
243    for absolute in [
244        "/", "/home", "/etc", "/var", "/tmp", "/usr", "/opt", "/root", "/proc", "/sys", "/dev",
245        // External / pseudo / package mounts that can hide huge trees:
246        "/mnt", "/media", "/run", "/snap", "/srv", "/boot",
247    ] {
248        push_canonical(&mut entries, Path::new(absolute));
249    }
250    for (env_var, fallback_sub) in [
251        ("XDG_CONFIG_HOME", Some(".config")),
252        ("XDG_CACHE_HOME", Some(".cache")),
253        ("XDG_DATA_HOME", Some(".local/share")),
254        // No fallback: XDG_RUNTIME_DIR has no spec'd default — only
255        // include it when the env var is set (and absolute / non-empty).
256        ("XDG_RUNTIME_DIR", None),
257    ] {
258        let env_path = std::env::var_os(env_var)
259            .map(PathBuf::from)
260            // Empty / relative env values would canonicalize against cwd
261            // and pollute the denylist with arbitrary paths — skip them.
262            .filter(|p| !p.as_os_str().is_empty() && p.is_absolute());
263        let path = env_path.or_else(|| fallback_sub.and_then(|sub| home.map(|h| h.join(sub))));
264        if let Some(p) = path {
265            push_canonical(&mut entries, &p);
266        }
267    }
268    entries
269}
270
271#[cfg(target_os = "windows")]
272fn builtin_denylist(home: Option<&Path>) -> Vec<PathBuf> {
273    let mut entries = Vec::new();
274    if let Some(h) = home {
275        for sub in ["", "Documents", "Downloads", "Desktop"] {
276            let p = if sub.is_empty() {
277                h.to_path_buf()
278            } else {
279                h.join(sub)
280            };
281            push_canonical(&mut entries, &p);
282        }
283    }
284    // System paths via env (handles non-default install drive / locale):
285    // - %SystemRoot%        : typically C:\Windows
286    // - %ProgramFiles%      : typically C:\Program Files
287    // - %ProgramFiles(x86)% : typically C:\Program Files (x86)
288    // - %ProgramData%       : typically C:\ProgramData
289    // - %APPDATA%, %LOCALAPPDATA%, %TEMP% : per-user roaming/local/temp
290    for env_var in [
291        "SystemRoot",
292        "ProgramFiles",
293        "ProgramFiles(x86)",
294        "ProgramData",
295        "APPDATA",
296        "LOCALAPPDATA",
297        "TEMP",
298    ] {
299        if let Some(v) = std::env::var_os(env_var) {
300            if !v.is_empty() {
301                push_canonical(&mut entries, Path::new(&v));
302            }
303        }
304    }
305    // Hardcoded fallbacks for the common case where env vars are unset
306    // (rare on Windows but possible in service / SYSTEM contexts):
307    for absolute in [
308        r"C:\Windows",
309        r"C:\Program Files",
310        r"C:\Program Files (x86)",
311        r"C:\ProgramData",
312    ] {
313        push_canonical(&mut entries, Path::new(absolute));
314    }
315    // Drive roots A:\..Z:\: only include drives that actually canonicalize
316    // (i.e. exist). This intentionally avoids hitting disconnected network
317    // drives — `std::fs::canonicalize` will fail fast for them and the
318    // entry is silently skipped via `push_canonical`.
319    for letter in b'A'..=b'Z' {
320        let root = format!(r"{}:\", letter as char);
321        push_canonical(&mut entries, Path::new(&root));
322    }
323    entries
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use tempfile::TempDir;
330
331    // ----- path_matches: pure logic, no FS dependency -----
332
333    #[test]
334    fn path_matches_exact() {
335        assert!(path_matches(Path::new("/var"), Path::new("/var")));
336    }
337
338    #[test]
339    fn path_matches_descendant() {
340        assert!(path_matches(Path::new("/var/foo"), Path::new("/var")));
341    }
342
343    #[test]
344    fn path_matches_deep_descendant() {
345        assert!(path_matches(
346            Path::new("/var/foo/bar/baz"),
347            Path::new("/var")
348        ));
349    }
350
351    #[test]
352    fn path_matches_sibling_var2_is_not_var() {
353        // Component-wise comparison: "var2" is not a prefix of "var".
354        assert!(!path_matches(Path::new("/var2"), Path::new("/var")));
355        assert!(!path_matches(Path::new("/var2/sub"), Path::new("/var")));
356        assert!(!path_matches(Path::new("/var/foo"), Path::new("/var2")));
357    }
358
359    #[test]
360    fn path_matches_unrelated_root_is_not_matched() {
361        assert!(!path_matches(Path::new("/etc"), Path::new("/var")));
362    }
363
364    #[cfg(any(target_os = "macos", target_os = "windows"))]
365    #[test]
366    fn path_matches_case_insensitive_on_macos_windows() {
367        assert!(path_matches(
368            Path::new("/Users/Foo"),
369            Path::new("/users/foo")
370        ));
371        assert!(path_matches(
372            Path::new("/USERS/FOO/bar"),
373            Path::new("/Users/Foo")
374        ));
375    }
376
377    #[cfg(target_os = "linux")]
378    #[test]
379    fn path_matches_case_sensitive_on_linux() {
380        assert!(!path_matches(
381            Path::new("/Users/Foo"),
382            Path::new("/users/foo")
383        ));
384    }
385
386    // ----- is_dangerous_inner: tests with controlled builtin (OS-agnostic) -----
387
388    #[test]
389    fn additional_absolute_entry_matches() {
390        let tmp = TempDir::new().unwrap();
391        let candidate = canonicalize_or_self(tmp.path());
392        let additional = vec![tmp.path().to_string_lossy().into_owned()];
393        assert!(is_dangerous_inner(&candidate, &additional, &[]));
394    }
395
396    #[test]
397    fn additional_subdir_match() {
398        let tmp = TempDir::new().unwrap();
399        let sub = tmp.path().join("sub");
400        std::fs::create_dir(&sub).unwrap();
401        let candidate = canonicalize_or_self(&sub);
402        let additional = vec![tmp.path().to_string_lossy().into_owned()];
403        assert!(is_dangerous_inner(&candidate, &additional, &[]));
404    }
405
406    #[test]
407    fn relative_additional_entry_skipped_with_warn() {
408        // No matches in builtin or absolute additional, only a relative entry
409        // (which should be warn-logged and skipped).
410        let tmp = TempDir::new().unwrap();
411        let candidate = canonicalize_or_self(tmp.path());
412        let additional = vec!["relative/path".to_string()];
413        assert!(!is_dangerous_inner(&candidate, &additional, &[]));
414    }
415
416    #[test]
417    fn unresolvable_additional_entry_silently_skipped() {
418        let tmp = TempDir::new().unwrap();
419        let candidate = canonicalize_or_self(tmp.path());
420        let additional = vec!["/does/not/exist/xyzzy/seshat-test".to_string()];
421        assert!(!is_dangerous_inner(&candidate, &additional, &[]));
422    }
423
424    #[test]
425    fn tilde_prefix_in_additional_is_skipped() {
426        // Tilde (~) and env-var ($VAR/%VAR%) prefixes are NOT expanded by
427        // design — they would canonicalize against cwd and pollute the
428        // denylist with arbitrary paths. The entry must be skipped.
429        let tmp = TempDir::new().unwrap();
430        let candidate = canonicalize_or_self(tmp.path());
431        let additional = vec!["~/scratch".to_string()];
432        assert!(!is_dangerous_inner(&candidate, &additional, &[]));
433    }
434
435    #[test]
436    fn env_var_prefix_in_additional_is_skipped() {
437        let tmp = TempDir::new().unwrap();
438        let candidate = canonicalize_or_self(tmp.path());
439        let additional = vec![
440            "$HOME/scratch".to_string(),
441            "%USERPROFILE%\\scratch".to_string(),
442        ];
443        assert!(!is_dangerous_inner(&candidate, &additional, &[]));
444    }
445
446    #[test]
447    fn no_home_falls_back_to_absolute_entries_only() {
448        // `home: None` simulates a stripped env (systemd unit, sandbox).
449        // The absolute denylist entries (e.g. `/`, `/var`) still apply, so
450        // a candidate that matches one of them is still flagged dangerous.
451        // We can't pick a known-canonical absolute path on every host
452        // platform, so verify only that the call does not panic.
453        let tmp = TempDir::new().unwrap();
454        let _ = is_dangerous_cwd_with_home(tmp.path(), &[], None);
455    }
456
457    // ----- is_dangerous_cwd_with_home: home injection -----
458
459    #[test]
460    fn home_dir_itself_is_dangerous() {
461        let tmp = TempDir::new().unwrap();
462        let home = tmp.path();
463        assert!(is_dangerous_cwd_with_home(home, &[], Some(home)));
464    }
465
466    #[test]
467    fn subdir_under_injected_home_is_dangerous() {
468        let tmp = TempDir::new().unwrap();
469        let home = tmp.path();
470        let sub = home.join("subproj");
471        std::fs::create_dir(&sub).unwrap();
472        assert!(is_dangerous_cwd_with_home(&sub, &[], Some(home)));
473    }
474
475    #[cfg(unix)]
476    #[test]
477    fn symlink_to_dangerous_is_resolved() {
478        let tmp = TempDir::new().unwrap();
479        let target = tmp.path().join("real_home");
480        std::fs::create_dir(&target).unwrap();
481        let link = tmp.path().join("link_to_home");
482        std::os::unix::fs::symlink(&target, &link).unwrap();
483        // Treat `target` as the home directory; following the symlink
484        // should canonicalize to `target` and match.
485        assert!(is_dangerous_cwd_with_home(&link, &[], Some(&target)));
486    }
487
488    #[test]
489    fn malformed_additional_does_not_panic_or_alter_result() {
490        let tmp = TempDir::new().unwrap();
491        let home = tmp.path();
492        // home itself matches via the injected home → still dangerous, but
493        // the relative additional entry must not panic.
494        assert!(is_dangerous_cwd_with_home(
495            home,
496            &["relative/skipped".to_string()],
497            Some(home),
498        ));
499    }
500
501    // ----- builtin_denylist coverage -----
502
503    #[test]
504    fn builtin_denylist_contains_injected_home() {
505        let tmp = TempDir::new().unwrap();
506        let home = tmp.path();
507        let entries = builtin_denylist(Some(home));
508        let canonical_home = std::fs::canonicalize(home).unwrap();
509        assert!(
510            entries.iter().any(|e| e == &canonical_home),
511            "builtin_denylist must include the injected home directory"
512        );
513    }
514
515    #[cfg(target_os = "linux")]
516    #[test]
517    fn linux_xdg_config_fallback_when_env_unset_or_set() {
518        // Whether XDG_CONFIG_HOME is set or not on the host, ~/.config under
519        // the injected home must still be matched (either via the .config
520        // fallback or via the home entry itself).
521        let tmp = TempDir::new().unwrap();
522        let home = tmp.path();
523        let dot_config = home.join(".config");
524        std::fs::create_dir(&dot_config).unwrap();
525        let canonical_dot_config = std::fs::canonicalize(&dot_config).unwrap();
526        let entries = builtin_denylist(Some(home));
527        assert!(
528            entries.iter().any(|e| canonical_dot_config.starts_with(e)),
529            "~/.config must be covered by the Linux denylist"
530        );
531    }
532
533    #[cfg(target_os = "macos")]
534    #[test]
535    fn macos_builtin_includes_library_under_injected_home() {
536        let tmp = TempDir::new().unwrap();
537        let home = tmp.path();
538        let lib = home.join("Library");
539        std::fs::create_dir(&lib).unwrap();
540        let canonical_lib = std::fs::canonicalize(&lib).unwrap();
541        let entries = builtin_denylist(Some(home));
542        assert!(entries.iter().any(|e| e == &canonical_lib));
543    }
544
545    // ----- public entry point smoke test -----
546
547    #[test]
548    fn public_is_dangerous_cwd_does_not_panic() {
549        // We can't predict whether the host's real cwd is dangerous, but the
550        // public entry point must not panic.
551        let _ = is_dangerous_cwd(Path::new("."), &[]);
552    }
553}