Skip to main content

lean_ctx/core/
pathutil.rs

1use std::path::{Path, PathBuf};
2
3/// Canonicalize a path and strip the Windows verbatim/extended-length prefix (`\\?\`)
4/// that `std::fs::canonicalize` adds on Windows. This prefix breaks many tools and
5/// string-based path comparisons.
6///
7/// On non-Windows platforms this is equivalent to `std::fs::canonicalize`.
8pub fn safe_canonicalize(path: &Path) -> std::io::Result<PathBuf> {
9    let canon = std::fs::canonicalize(path)?;
10    Ok(strip_verbatim(canon))
11}
12
13/// Like `safe_canonicalize` but returns the original path on failure.
14pub fn safe_canonicalize_or_self(path: &Path) -> PathBuf {
15    safe_canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
16}
17
18/// Canonicalize with a timeout guard. Protects against hangs on WSL2 DrvFS,
19/// Windows reparse points, NFS, FUSE, sshfs, and other slow filesystems.
20/// Falls back to the original path if canonicalize doesn't complete within the timeout.
21/// Self-healing: after a timeout, subsequent calls to slow mounts skip the thread entirely.
22pub fn safe_canonicalize_bounded(path: &Path, timeout_ms: u64) -> PathBuf {
23    use super::io_health;
24
25    let path_str = path.to_string_lossy();
26    if io_health::is_slow_mount(&path_str) && io_health::recent_freeze_count() > 0 {
27        return safe_canonicalize_or_self(path);
28    }
29
30    let effective_timeout =
31        io_health::adaptive_timeout(std::time::Duration::from_millis(timeout_ms));
32
33    let path_owned = path.to_path_buf();
34    let (tx, rx) = std::sync::mpsc::channel();
35    let _ = std::thread::Builder::new()
36        .name("canonicalize-bounded".into())
37        .spawn(move || {
38            let result = safe_canonicalize(&path_owned).unwrap_or(path_owned);
39            let _ = tx.send(result);
40        });
41    if let Ok(canonical) = rx.recv_timeout(effective_timeout) {
42        canonical
43    } else {
44        io_health::record_freeze();
45        tracing::warn!(
46            "[SECURITY] canonicalize timed out ({}ms) for {}; PathJail checks on \
47             uncanonicalized paths may be less reliable",
48            effective_timeout.as_millis(),
49            path.display()
50        );
51        path.to_path_buf()
52    }
53}
54
55/// Remove the `\\?\` / `//?/` verbatim prefix from a `PathBuf`.
56/// Handles both regular verbatim (`\\?\C:\...`) and UNC verbatim (`\\?\UNC\...`).
57pub fn strip_verbatim(path: PathBuf) -> PathBuf {
58    let s = path.to_string_lossy();
59    if let Some(stripped) = strip_verbatim_str(&s) {
60        PathBuf::from(stripped)
61    } else {
62        path
63    }
64}
65
66/// Remove the `\\?\` / `//?/` verbatim prefix from a path string.
67/// Returns `Some(cleaned)` if a prefix was found, `None` otherwise.
68pub fn strip_verbatim_str(path: &str) -> Option<String> {
69    let normalized = path.replace('\\', "/");
70
71    if let Some(rest) = normalized.strip_prefix("//?/UNC/") {
72        Some(format!("//{rest}"))
73    } else {
74        normalized
75            .strip_prefix("//?/")
76            .map(std::string::ToString::to_string)
77    }
78}
79
80/// Normalize paths from any client format to a consistent OS-native form.
81/// Handles MSYS2/Git Bash (`/c/Users/...` -> `C:/Users/...`), mixed separators,
82/// double slashes, and trailing slashes. Uses forward slashes for consistency.
83pub fn normalize_tool_path(path: &str) -> String {
84    let mut p = match strip_verbatim_str(path) {
85        Some(stripped) => stripped,
86        None => path.to_string(),
87    };
88
89    // MSYS2/Git Bash: /c/Users/... -> C:/Users/...
90    if p.len() >= 3
91        && p.starts_with('/')
92        && p.as_bytes()[1].is_ascii_alphabetic()
93        && p.as_bytes()[2] == b'/'
94    {
95        let drive = p.as_bytes()[1].to_ascii_uppercase() as char;
96        p = format!("{drive}:{}", &p[2..]);
97    }
98
99    p = p.replace('\\', "/");
100
101    // Collapse double slashes (preserve UNC paths starting with //)
102    while p.contains("//") && !p.starts_with("//") {
103        p = p.replace("//", "/");
104    }
105
106    // Remove trailing slash (unless root like "/" or "C:/")
107    if p.len() > 1 && p.ends_with('/') && !p.ends_with(":/") {
108        p.pop();
109    }
110
111    // Resolve symlinks for absolute paths to ensure cache key consistency.
112    // Skip relative paths (preserve "." / "../" as-is), root-only paths (/ or C:/),
113    // and slow mounts (WSL DrvFS /mnt/) where canonicalize can hang.
114    // Uses safe_canonicalize to strip Windows \\?\ prefix.
115    let is_absolute = p.starts_with('/') || (p.len() >= 3 && p.as_bytes()[1] == b':');
116    let is_root_only = p == "/" || (p.len() <= 3 && p.ends_with('/') && is_absolute);
117    if is_absolute && !is_root_only && !crate::core::io_health::is_slow_mount(&p) {
118        if let Ok(canonical) = safe_canonicalize(Path::new(&*p)) {
119            let canonical_str = canonical.to_string_lossy().replace('\\', "/");
120            if !canonical_str.is_empty() {
121                p = canonical_str;
122            }
123        }
124    }
125
126    p
127}
128
129/// Returns `true` if the directory is too broad to be a valid project root.
130/// Rejects home directory, filesystem root, `.` (bare CWD), and agent sandbox
131/// directories (`.claude`, `.codex`). Used to prevent writing project-scoped
132/// data (overlays, policies) into the global `~/.lean-ctx/` data directory.
133pub fn is_broad_or_unsafe_root(dir: &Path) -> bool {
134    if let Some(home) = dirs::home_dir() {
135        if dir == home {
136            return true;
137        }
138    }
139    let s = dir.to_string_lossy();
140    if s == "/" || s == "\\" || s == "." {
141        return true;
142    }
143    s.ends_with("/.claude")
144        || s.ends_with("/.codex")
145        || s.contains("/.claude/")
146        || s.contains("/.codex/")
147}
148
149/// Well-known project markers used to identify project roots.
150pub const PROJECT_MARKERS: &[&str] = &[
151    ".git",
152    "Cargo.toml",
153    "package.json",
154    "go.mod",
155    "pyproject.toml",
156    "setup.py",
157    "pom.xml",
158    "build.gradle",
159    "Makefile",
160    ".lean-ctx.toml",
161    ".planning",
162];
163
164/// Returns `true` if `dir` contains at least one known project marker.
165pub fn has_project_marker(dir: &Path) -> bool {
166    PROJECT_MARKERS.iter().any(|m| dir.join(m).exists())
167}
168
169/// Returns `true` if `dir` is the home directory or one of the macOS "magic"
170/// home subdirectories (`Documents`, `Desktop`, `Downloads`).
171///
172/// macOS guards these with TCC: the first time a process *enumerates or stats
173/// inside* one, the OS pops a privacy prompt ("lean-ctx would like to access
174/// files in your Documents folder", #356). They are also never valid project
175/// roots or multi-repo workspace parents, so scan heuristics should treat them
176/// as off-limits *without* calling `read_dir` (which is what trips the prompt).
177pub fn is_tcc_sensitive_home_dir(dir: &Path) -> bool {
178    let Some(home) = dirs::home_dir() else {
179        return false;
180    };
181    if dir == home {
182        return true;
183    }
184    if dir.parent() != Some(home.as_path()) {
185        return false;
186    }
187    matches!(
188        dir.file_name().and_then(|n| n.to_str()),
189        Some("Documents" | "Desktop" | "Downloads")
190    )
191}
192
193/// Returns `true` if `dir` is a multi-repo workspace parent — i.e. it has at
194/// least 2 immediate child directories that each contain a project marker.
195pub fn has_multi_repo_children(dir: &Path) -> bool {
196    // Never enumerate the home dir or macOS TCC-protected dirs: read_dir there
197    // pops a macOS privacy prompt (#356) and they are never workspace parents.
198    if is_tcc_sensitive_home_dir(dir) {
199        return false;
200    }
201    let Ok(entries) = std::fs::read_dir(dir) else {
202        return false;
203    };
204    let count = entries
205        .filter_map(Result::ok)
206        .filter(|e| e.file_type().is_ok_and(|ft| ft.is_dir()))
207        .filter(|e| has_project_marker(&e.path()))
208        .take(2)
209        .count();
210    count >= 2
211}
212
213/// Returns `true` if `project_root` collides with the lean-ctx data directory.
214/// This prevents project-scoped files (overlays.json, policies.json) from being
215/// written into `~/.lean-ctx/` or `~/.config/lean-ctx/`.
216pub fn is_data_dir_collision(project_root: &Path) -> bool {
217    if is_broad_or_unsafe_root(project_root) {
218        return true;
219    }
220    if let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() {
221        let project_lean_ctx = project_root.join(".lean-ctx");
222        if project_lean_ctx == data_dir || data_dir.starts_with(&project_lean_ctx) {
223            return true;
224        }
225    }
226    false
227}
228
229/// Returns the project-scoped `.lean-ctx/` directory if the project root is safe.
230/// Returns `Err` if the project root collides with the global data directory.
231pub fn safe_project_data_dir(project_root: &Path) -> Result<PathBuf, String> {
232    if is_data_dir_collision(project_root) {
233        return Err(format!(
234            "project root {} collides with global data directory; \
235             skipping project-scoped write",
236            project_root.display()
237        ));
238    }
239    Ok(project_root.join(".lean-ctx"))
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn strip_regular_verbatim() {
248        let p = PathBuf::from(r"\\?\C:\Users\dev\project");
249        let result = strip_verbatim(p);
250        assert_eq!(result, PathBuf::from("C:/Users/dev/project"));
251    }
252
253    #[test]
254    fn tcc_sensitive_home_dir_matches_home_and_magic_dirs() {
255        let Some(home) = dirs::home_dir() else {
256            return;
257        };
258        // Home itself and the macOS magic dirs are off-limits (#356).
259        assert!(is_tcc_sensitive_home_dir(&home));
260        assert!(is_tcc_sensitive_home_dir(&home.join("Documents")));
261        assert!(is_tcc_sensitive_home_dir(&home.join("Desktop")));
262        assert!(is_tcc_sensitive_home_dir(&home.join("Downloads")));
263    }
264
265    #[test]
266    fn tcc_sensitive_home_dir_allows_real_projects() {
267        let Some(home) = dirs::home_dir() else {
268            return;
269        };
270        // A real project (even nested under Documents) and non-magic home children
271        // are scannable — only the bare magic dirs / home are blocked.
272        assert!(!is_tcc_sensitive_home_dir(
273            &home.join("Documents").join("my-project")
274        ));
275        assert!(!is_tcc_sensitive_home_dir(&home.join("code")));
276        assert!(!is_tcc_sensitive_home_dir(&home.join("Projects")));
277    }
278
279    #[test]
280    fn strip_unc_verbatim() {
281        let p = PathBuf::from(r"\\?\UNC\server\share\dir");
282        let result = strip_verbatim(p);
283        assert_eq!(result, PathBuf::from("//server/share/dir"));
284    }
285
286    #[test]
287    fn no_prefix_unchanged() {
288        let p = PathBuf::from("/home/user/project");
289        let result = strip_verbatim(p.clone());
290        assert_eq!(result, p);
291    }
292
293    #[test]
294    fn windows_drive_unchanged() {
295        let p = PathBuf::from("C:/Users/dev");
296        let result = strip_verbatim(p.clone());
297        assert_eq!(result, p);
298    }
299
300    #[test]
301    fn strip_str_regular() {
302        assert_eq!(
303            strip_verbatim_str(r"\\?\E:\code\lean-ctx"),
304            Some("E:/code/lean-ctx".to_string())
305        );
306    }
307
308    #[test]
309    fn strip_str_unc() {
310        assert_eq!(
311            strip_verbatim_str(r"\\?\UNC\myserver\data"),
312            Some("//myserver/data".to_string())
313        );
314    }
315
316    #[test]
317    fn strip_str_forward_slash_variant() {
318        assert_eq!(
319            strip_verbatim_str("//?/C:/Users/dev"),
320            Some("C:/Users/dev".to_string())
321        );
322    }
323
324    #[test]
325    fn strip_str_no_prefix() {
326        assert_eq!(strip_verbatim_str("/home/user"), None);
327    }
328
329    #[test]
330    fn safe_canonicalize_or_self_nonexistent() {
331        let p = Path::new("/this/path/should/not/exist/xyzzy");
332        let result = safe_canonicalize_or_self(p);
333        assert_eq!(result, p.to_path_buf());
334    }
335
336    #[test]
337    fn normalize_msys_path_to_native() {
338        assert_eq!(
339            normalize_tool_path("/c/Users/ABC/AppData/lean-ctx"),
340            "C:/Users/ABC/AppData/lean-ctx"
341        );
342    }
343
344    #[test]
345    fn normalize_msys_uppercase_drive() {
346        assert_eq!(
347            normalize_tool_path("/D/Program Files/lean-ctx.exe"),
348            "D:/Program Files/lean-ctx.exe"
349        );
350    }
351
352    #[test]
353    fn normalize_native_windows_path_unchanged() {
354        assert_eq!(
355            normalize_tool_path("C:/Users/ABC/lean-ctx.exe"),
356            "C:/Users/ABC/lean-ctx.exe"
357        );
358    }
359
360    #[test]
361    fn normalize_backslash_windows_path() {
362        assert_eq!(
363            normalize_tool_path(r"C:\Users\ABC\lean-ctx.exe"),
364            "C:/Users/ABC/lean-ctx.exe"
365        );
366    }
367
368    #[test]
369    fn normalize_unix_path_unchanged() {
370        assert_eq!(
371            normalize_tool_path("/usr/local/bin/lean-ctx"),
372            "/usr/local/bin/lean-ctx"
373        );
374    }
375
376    #[test]
377    fn normalize_windows_path_with_spaces_and_backslashes() {
378        // The exact "paths with spaces" scenario reported on Windows (#324):
379        // backslashes are converted to forward slashes (so client render layers
380        // never escape-mangle them) while spaces in directory names survive.
381        assert_eq!(
382            normalize_tool_path(r"C:\Users\My Name\My Project\src\main.rs"),
383            "C:/Users/My Name/My Project/src/main.rs"
384        );
385        assert_eq!(
386            normalize_tool_path(r"C:\Program Files\app\config.toml"),
387            "C:/Program Files/app/config.toml"
388        );
389    }
390
391    #[test]
392    fn normalize_double_slashes() {
393        assert_eq!(
394            normalize_tool_path("C:/Users//ABC//lean-ctx"),
395            "C:/Users/ABC/lean-ctx"
396        );
397    }
398
399    #[test]
400    fn normalize_trailing_slash_removed() {
401        assert_eq!(normalize_tool_path("/c/Users/ABC/"), "C:/Users/ABC");
402    }
403
404    #[test]
405    fn normalize_root_slash_preserved() {
406        assert_eq!(normalize_tool_path("/"), "/");
407    }
408
409    #[test]
410    fn normalize_drive_root_preserved() {
411        assert_eq!(normalize_tool_path("C:/"), "C:/");
412    }
413
414    #[test]
415    fn normalize_verbatim_with_msys() {
416        assert_eq!(normalize_tool_path(r"\\?\C:\Users\dev"), "C:/Users/dev");
417    }
418
419    #[test]
420    fn broad_root_rejects_home() {
421        if let Some(home) = dirs::home_dir() {
422            assert!(is_broad_or_unsafe_root(&home));
423        }
424    }
425
426    #[test]
427    fn broad_root_rejects_filesystem_root() {
428        assert!(is_broad_or_unsafe_root(Path::new("/")));
429    }
430
431    #[test]
432    fn broad_root_rejects_dot() {
433        assert!(is_broad_or_unsafe_root(Path::new(".")));
434    }
435
436    #[test]
437    fn broad_root_rejects_agent_dirs() {
438        assert!(is_broad_or_unsafe_root(Path::new("/home/user/.claude")));
439        assert!(is_broad_or_unsafe_root(Path::new("/home/user/.codex")));
440    }
441
442    #[test]
443    fn broad_root_allows_project_subdir() {
444        let tmp = tempfile::tempdir().unwrap();
445        let subdir = tmp.path().join("my-project");
446        std::fs::create_dir_all(&subdir).unwrap();
447        assert!(!is_broad_or_unsafe_root(&subdir));
448    }
449
450    #[test]
451    fn broad_root_allows_home_subdirs() {
452        if let Some(home) = dirs::home_dir() {
453            let subdir = home.join("projects").join("my-app");
454            assert!(!is_broad_or_unsafe_root(&subdir));
455        }
456    }
457
458    #[test]
459    fn data_dir_collision_rejects_home() {
460        if let Some(home) = dirs::home_dir() {
461            assert!(is_data_dir_collision(&home));
462        }
463    }
464
465    #[test]
466    fn data_dir_collision_allows_normal_project() {
467        let tmp = tempfile::tempdir().unwrap();
468        let project = tmp.path().join("my-project");
469        std::fs::create_dir_all(&project).unwrap();
470        assert!(!is_data_dir_collision(&project));
471    }
472
473    #[test]
474    fn has_project_marker_detects_git() {
475        let tmp = tempfile::tempdir().unwrap();
476        let root = tmp.path().join("repo");
477        std::fs::create_dir_all(&root).unwrap();
478        assert!(!has_project_marker(&root));
479        std::fs::create_dir(root.join(".git")).unwrap();
480        assert!(has_project_marker(&root));
481    }
482
483    #[test]
484    fn has_project_marker_detects_cargo_toml() {
485        let tmp = tempfile::tempdir().unwrap();
486        let root = tmp.path().join("rust-project");
487        std::fs::create_dir_all(&root).unwrap();
488        std::fs::write(root.join("Cargo.toml"), "[package]").unwrap();
489        assert!(has_project_marker(&root));
490    }
491
492    #[test]
493    fn multi_repo_children_needs_two() {
494        let tmp = tempfile::tempdir().unwrap();
495        let parent = tmp.path().join("code");
496        std::fs::create_dir_all(&parent).unwrap();
497
498        // 0 repos → false
499        assert!(!has_multi_repo_children(&parent));
500
501        // 1 repo → false
502        let repo1 = parent.join("repo1");
503        std::fs::create_dir_all(repo1.join(".git")).unwrap();
504        assert!(!has_multi_repo_children(&parent));
505
506        // 2 repos → true
507        let repo2 = parent.join("repo2");
508        std::fs::create_dir_all(repo2.join(".git")).unwrap();
509        assert!(has_multi_repo_children(&parent));
510    }
511
512    #[test]
513    fn multi_repo_children_ignores_files() {
514        let tmp = tempfile::tempdir().unwrap();
515        let parent = tmp.path().join("mixed");
516        std::fs::create_dir_all(&parent).unwrap();
517
518        // One repo dir + one plain file with .git name (not a dir)
519        let repo1 = parent.join("repo1");
520        std::fs::create_dir_all(repo1.join(".git")).unwrap();
521        std::fs::write(parent.join("not-a-repo"), "file").unwrap();
522        assert!(!has_multi_repo_children(&parent));
523
524        // Add second actual repo
525        let repo2 = parent.join("repo2");
526        std::fs::create_dir_all(&repo2).unwrap();
527        std::fs::write(repo2.join("package.json"), "{}").unwrap();
528        assert!(has_multi_repo_children(&parent));
529    }
530
531    #[test]
532    fn multi_repo_children_nonexistent_dir() {
533        assert!(!has_multi_repo_children(Path::new("/nonexistent/path/xyz")));
534    }
535}