Skip to main content

lean_ctx/core/
pathutil.rs

1use std::path::{Path, PathBuf};
2
3/// Canonicalize a path and strip the Windows verbatim/extended-length prefix (`\\?\`)
4/// that `std::fs::canonicalize` adds on Windows. This prefix breaks many tools and
5/// string-based path comparisons.
6///
7/// On non-Windows platforms this is equivalent to `std::fs::canonicalize`.
8pub fn safe_canonicalize(path: &Path) -> std::io::Result<PathBuf> {
9    let canon = std::fs::canonicalize(path)?;
10    Ok(strip_verbatim(canon))
11}
12
13/// Like `safe_canonicalize` but returns the original path on failure.
14pub fn safe_canonicalize_or_self(path: &Path) -> PathBuf {
15    safe_canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
16}
17
18/// Canonicalize with a timeout guard. Protects against hangs on WSL2 DrvFS,
19/// Windows reparse points, NFS, FUSE, sshfs, and other slow filesystems.
20/// Falls back to the original path if canonicalize doesn't complete within the timeout.
21/// Self-healing: after a timeout, subsequent calls to slow mounts skip the thread entirely.
22pub fn safe_canonicalize_bounded(path: &Path, timeout_ms: u64) -> PathBuf {
23    use super::io_health;
24
25    let path_str = path.to_string_lossy();
26    if io_health::is_slow_mount(&path_str) && io_health::recent_freeze_count() > 0 {
27        return safe_canonicalize_or_self(path);
28    }
29
30    let effective_timeout =
31        io_health::adaptive_timeout(std::time::Duration::from_millis(timeout_ms));
32
33    let path_owned = path.to_path_buf();
34    let (tx, rx) = std::sync::mpsc::channel();
35    let _ = std::thread::Builder::new()
36        .name("canonicalize-bounded".into())
37        .spawn(move || {
38            let result = safe_canonicalize(&path_owned).unwrap_or(path_owned);
39            let _ = tx.send(result);
40        });
41    if let Ok(canonical) = rx.recv_timeout(effective_timeout) {
42        canonical
43    } else {
44        io_health::record_freeze();
45        tracing::warn!(
46            "[SECURITY] canonicalize timed out ({}ms) for {}; PathJail checks on \
47             uncanonicalized paths may be less reliable",
48            effective_timeout.as_millis(),
49            path.display()
50        );
51        path.to_path_buf()
52    }
53}
54
55/// Remove the `\\?\` / `//?/` verbatim prefix from a `PathBuf`.
56/// Handles both regular verbatim (`\\?\C:\...`) and UNC verbatim (`\\?\UNC\...`).
57pub fn strip_verbatim(path: PathBuf) -> PathBuf {
58    let s = path.to_string_lossy();
59    if let Some(stripped) = strip_verbatim_str(&s) {
60        PathBuf::from(stripped)
61    } else {
62        path
63    }
64}
65
66/// Remove the `\\?\` / `//?/` verbatim prefix from a path string.
67/// Returns `Some(cleaned)` if a prefix was found, `None` otherwise.
68pub fn strip_verbatim_str(path: &str) -> Option<String> {
69    let normalized = path.replace('\\', "/");
70
71    if let Some(rest) = normalized.strip_prefix("//?/UNC/") {
72        Some(format!("//{rest}"))
73    } else {
74        normalized
75            .strip_prefix("//?/")
76            .map(std::string::ToString::to_string)
77    }
78}
79
80/// Normalize paths from any client format to a consistent OS-native form.
81/// Handles MSYS2/Git Bash (`/c/Users/...` -> `C:/Users/...`), mixed separators,
82/// double slashes, and trailing slashes. Uses forward slashes for consistency.
83pub fn normalize_tool_path(path: &str) -> String {
84    let mut p = match strip_verbatim_str(path) {
85        Some(stripped) => stripped,
86        None => path.to_string(),
87    };
88
89    // MSYS2/Git Bash: /c/Users/... -> C:/Users/...
90    if p.len() >= 3
91        && p.starts_with('/')
92        && p.as_bytes()[1].is_ascii_alphabetic()
93        && p.as_bytes()[2] == b'/'
94    {
95        let drive = p.as_bytes()[1].to_ascii_uppercase() as char;
96        p = format!("{drive}:{}", &p[2..]);
97    }
98
99    p = p.replace('\\', "/");
100
101    // Collapse double slashes (preserve UNC paths starting with //)
102    while p.contains("//") && !p.starts_with("//") {
103        p = p.replace("//", "/");
104    }
105
106    // Remove trailing slash (unless root like "/" or "C:/")
107    if p.len() > 1 && p.ends_with('/') && !p.ends_with(":/") {
108        p.pop();
109    }
110
111    // Resolve symlinks for absolute paths to ensure cache key consistency.
112    // Skip relative paths (preserve "." / "../" as-is), root-only paths (/ or C:/),
113    // and slow mounts (WSL DrvFS /mnt/) where canonicalize can hang.
114    // Uses safe_canonicalize to strip Windows \\?\ prefix.
115    let is_absolute = p.starts_with('/') || (p.len() >= 3 && p.as_bytes()[1] == b':');
116    let is_root_only = p == "/" || (p.len() <= 3 && p.ends_with('/') && is_absolute);
117    if is_absolute && !is_root_only && !crate::core::io_health::is_slow_mount(&p) {
118        if let Ok(canonical) = safe_canonicalize(Path::new(&*p)) {
119            let canonical_str = canonical.to_string_lossy().replace('\\', "/");
120            if !canonical_str.is_empty() {
121                p = canonical_str;
122            }
123        }
124    }
125
126    p
127}
128
129/// Returns `true` if the directory is too broad to be a valid project root.
130/// Rejects home directory, filesystem root, `.` (bare CWD), and agent sandbox
131/// directories (`.claude`, `.codex`). Used to prevent writing project-scoped
132/// data (overlays, policies) into the global `~/.lean-ctx/` data directory.
133pub fn is_broad_or_unsafe_root(dir: &Path) -> bool {
134    if let Some(home) = dirs::home_dir() {
135        if dir == home {
136            return true;
137        }
138    }
139    let s = dir.to_string_lossy();
140    if s == "/" || s == "\\" || s == "." {
141        return true;
142    }
143    s.ends_with("/.claude")
144        || s.ends_with("/.codex")
145        || s.contains("/.claude/")
146        || s.contains("/.codex/")
147}
148
149/// Well-known project markers used to identify project roots.
150pub const PROJECT_MARKERS: &[&str] = &[
151    ".git",
152    "Cargo.toml",
153    "package.json",
154    "go.mod",
155    "pyproject.toml",
156    "setup.py",
157    "pom.xml",
158    "build.gradle",
159    "Makefile",
160    ".lean-ctx.toml",
161];
162
163/// Returns `true` if `dir` contains at least one known project marker.
164pub fn has_project_marker(dir: &Path) -> bool {
165    PROJECT_MARKERS.iter().any(|m| dir.join(m).exists())
166}
167
168/// Returns `true` if `dir` is a multi-repo workspace parent — i.e. it has at
169/// least 2 immediate child directories that each contain a project marker.
170pub fn has_multi_repo_children(dir: &Path) -> bool {
171    let Ok(entries) = std::fs::read_dir(dir) else {
172        return false;
173    };
174    let count = entries
175        .filter_map(Result::ok)
176        .filter(|e| e.file_type().is_ok_and(|ft| ft.is_dir()))
177        .filter(|e| has_project_marker(&e.path()))
178        .take(2)
179        .count();
180    count >= 2
181}
182
183/// Returns `true` if `project_root` collides with the lean-ctx data directory.
184/// This prevents project-scoped files (overlays.json, policies.json) from being
185/// written into `~/.lean-ctx/` or `~/.config/lean-ctx/`.
186pub fn is_data_dir_collision(project_root: &Path) -> bool {
187    if is_broad_or_unsafe_root(project_root) {
188        return true;
189    }
190    if let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() {
191        let project_lean_ctx = project_root.join(".lean-ctx");
192        if project_lean_ctx == data_dir || data_dir.starts_with(&project_lean_ctx) {
193            return true;
194        }
195    }
196    false
197}
198
199/// Returns the project-scoped `.lean-ctx/` directory if the project root is safe.
200/// Returns `Err` if the project root collides with the global data directory.
201pub fn safe_project_data_dir(project_root: &Path) -> Result<PathBuf, String> {
202    if is_data_dir_collision(project_root) {
203        return Err(format!(
204            "project root {} collides with global data directory; \
205             skipping project-scoped write",
206            project_root.display()
207        ));
208    }
209    Ok(project_root.join(".lean-ctx"))
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    #[test]
217    fn strip_regular_verbatim() {
218        let p = PathBuf::from(r"\\?\C:\Users\dev\project");
219        let result = strip_verbatim(p);
220        assert_eq!(result, PathBuf::from("C:/Users/dev/project"));
221    }
222
223    #[test]
224    fn strip_unc_verbatim() {
225        let p = PathBuf::from(r"\\?\UNC\server\share\dir");
226        let result = strip_verbatim(p);
227        assert_eq!(result, PathBuf::from("//server/share/dir"));
228    }
229
230    #[test]
231    fn no_prefix_unchanged() {
232        let p = PathBuf::from("/home/user/project");
233        let result = strip_verbatim(p.clone());
234        assert_eq!(result, p);
235    }
236
237    #[test]
238    fn windows_drive_unchanged() {
239        let p = PathBuf::from("C:/Users/dev");
240        let result = strip_verbatim(p.clone());
241        assert_eq!(result, p);
242    }
243
244    #[test]
245    fn strip_str_regular() {
246        assert_eq!(
247            strip_verbatim_str(r"\\?\E:\code\lean-ctx"),
248            Some("E:/code/lean-ctx".to_string())
249        );
250    }
251
252    #[test]
253    fn strip_str_unc() {
254        assert_eq!(
255            strip_verbatim_str(r"\\?\UNC\myserver\data"),
256            Some("//myserver/data".to_string())
257        );
258    }
259
260    #[test]
261    fn strip_str_forward_slash_variant() {
262        assert_eq!(
263            strip_verbatim_str("//?/C:/Users/dev"),
264            Some("C:/Users/dev".to_string())
265        );
266    }
267
268    #[test]
269    fn strip_str_no_prefix() {
270        assert_eq!(strip_verbatim_str("/home/user"), None);
271    }
272
273    #[test]
274    fn safe_canonicalize_or_self_nonexistent() {
275        let p = Path::new("/this/path/should/not/exist/xyzzy");
276        let result = safe_canonicalize_or_self(p);
277        assert_eq!(result, p.to_path_buf());
278    }
279
280    #[test]
281    fn normalize_msys_path_to_native() {
282        assert_eq!(
283            normalize_tool_path("/c/Users/ABC/AppData/lean-ctx"),
284            "C:/Users/ABC/AppData/lean-ctx"
285        );
286    }
287
288    #[test]
289    fn normalize_msys_uppercase_drive() {
290        assert_eq!(
291            normalize_tool_path("/D/Program Files/lean-ctx.exe"),
292            "D:/Program Files/lean-ctx.exe"
293        );
294    }
295
296    #[test]
297    fn normalize_native_windows_path_unchanged() {
298        assert_eq!(
299            normalize_tool_path("C:/Users/ABC/lean-ctx.exe"),
300            "C:/Users/ABC/lean-ctx.exe"
301        );
302    }
303
304    #[test]
305    fn normalize_backslash_windows_path() {
306        assert_eq!(
307            normalize_tool_path(r"C:\Users\ABC\lean-ctx.exe"),
308            "C:/Users/ABC/lean-ctx.exe"
309        );
310    }
311
312    #[test]
313    fn normalize_unix_path_unchanged() {
314        assert_eq!(
315            normalize_tool_path("/usr/local/bin/lean-ctx"),
316            "/usr/local/bin/lean-ctx"
317        );
318    }
319
320    #[test]
321    fn normalize_windows_path_with_spaces_and_backslashes() {
322        // The exact "paths with spaces" scenario reported on Windows (#324):
323        // backslashes are converted to forward slashes (so client render layers
324        // never escape-mangle them) while spaces in directory names survive.
325        assert_eq!(
326            normalize_tool_path(r"C:\Users\My Name\My Project\src\main.rs"),
327            "C:/Users/My Name/My Project/src/main.rs"
328        );
329        assert_eq!(
330            normalize_tool_path(r"C:\Program Files\app\config.toml"),
331            "C:/Program Files/app/config.toml"
332        );
333    }
334
335    #[test]
336    fn normalize_double_slashes() {
337        assert_eq!(
338            normalize_tool_path("C:/Users//ABC//lean-ctx"),
339            "C:/Users/ABC/lean-ctx"
340        );
341    }
342
343    #[test]
344    fn normalize_trailing_slash_removed() {
345        assert_eq!(normalize_tool_path("/c/Users/ABC/"), "C:/Users/ABC");
346    }
347
348    #[test]
349    fn normalize_root_slash_preserved() {
350        assert_eq!(normalize_tool_path("/"), "/");
351    }
352
353    #[test]
354    fn normalize_drive_root_preserved() {
355        assert_eq!(normalize_tool_path("C:/"), "C:/");
356    }
357
358    #[test]
359    fn normalize_verbatim_with_msys() {
360        assert_eq!(normalize_tool_path(r"\\?\C:\Users\dev"), "C:/Users/dev");
361    }
362
363    #[test]
364    fn broad_root_rejects_home() {
365        if let Some(home) = dirs::home_dir() {
366            assert!(is_broad_or_unsafe_root(&home));
367        }
368    }
369
370    #[test]
371    fn broad_root_rejects_filesystem_root() {
372        assert!(is_broad_or_unsafe_root(Path::new("/")));
373    }
374
375    #[test]
376    fn broad_root_rejects_dot() {
377        assert!(is_broad_or_unsafe_root(Path::new(".")));
378    }
379
380    #[test]
381    fn broad_root_rejects_agent_dirs() {
382        assert!(is_broad_or_unsafe_root(Path::new("/home/user/.claude")));
383        assert!(is_broad_or_unsafe_root(Path::new("/home/user/.codex")));
384    }
385
386    #[test]
387    fn broad_root_allows_project_subdir() {
388        let tmp = tempfile::tempdir().unwrap();
389        let subdir = tmp.path().join("my-project");
390        std::fs::create_dir_all(&subdir).unwrap();
391        assert!(!is_broad_or_unsafe_root(&subdir));
392    }
393
394    #[test]
395    fn broad_root_allows_home_subdirs() {
396        if let Some(home) = dirs::home_dir() {
397            let subdir = home.join("projects").join("my-app");
398            assert!(!is_broad_or_unsafe_root(&subdir));
399        }
400    }
401
402    #[test]
403    fn data_dir_collision_rejects_home() {
404        if let Some(home) = dirs::home_dir() {
405            assert!(is_data_dir_collision(&home));
406        }
407    }
408
409    #[test]
410    fn data_dir_collision_allows_normal_project() {
411        let tmp = tempfile::tempdir().unwrap();
412        let project = tmp.path().join("my-project");
413        std::fs::create_dir_all(&project).unwrap();
414        assert!(!is_data_dir_collision(&project));
415    }
416
417    #[test]
418    fn has_project_marker_detects_git() {
419        let tmp = tempfile::tempdir().unwrap();
420        let root = tmp.path().join("repo");
421        std::fs::create_dir_all(&root).unwrap();
422        assert!(!has_project_marker(&root));
423        std::fs::create_dir(root.join(".git")).unwrap();
424        assert!(has_project_marker(&root));
425    }
426
427    #[test]
428    fn has_project_marker_detects_cargo_toml() {
429        let tmp = tempfile::tempdir().unwrap();
430        let root = tmp.path().join("rust-project");
431        std::fs::create_dir_all(&root).unwrap();
432        std::fs::write(root.join("Cargo.toml"), "[package]").unwrap();
433        assert!(has_project_marker(&root));
434    }
435
436    #[test]
437    fn multi_repo_children_needs_two() {
438        let tmp = tempfile::tempdir().unwrap();
439        let parent = tmp.path().join("code");
440        std::fs::create_dir_all(&parent).unwrap();
441
442        // 0 repos → false
443        assert!(!has_multi_repo_children(&parent));
444
445        // 1 repo → false
446        let repo1 = parent.join("repo1");
447        std::fs::create_dir_all(repo1.join(".git")).unwrap();
448        assert!(!has_multi_repo_children(&parent));
449
450        // 2 repos → true
451        let repo2 = parent.join("repo2");
452        std::fs::create_dir_all(repo2.join(".git")).unwrap();
453        assert!(has_multi_repo_children(&parent));
454    }
455
456    #[test]
457    fn multi_repo_children_ignores_files() {
458        let tmp = tempfile::tempdir().unwrap();
459        let parent = tmp.path().join("mixed");
460        std::fs::create_dir_all(&parent).unwrap();
461
462        // One repo dir + one plain file with .git name (not a dir)
463        let repo1 = parent.join("repo1");
464        std::fs::create_dir_all(repo1.join(".git")).unwrap();
465        std::fs::write(parent.join("not-a-repo"), "file").unwrap();
466        assert!(!has_multi_repo_children(&parent));
467
468        // Add second actual repo
469        let repo2 = parent.join("repo2");
470        std::fs::create_dir_all(&repo2).unwrap();
471        std::fs::write(repo2.join("package.json"), "{}").unwrap();
472        assert!(has_multi_repo_children(&parent));
473    }
474
475    #[test]
476    fn multi_repo_children_nonexistent_dir() {
477        assert!(!has_multi_repo_children(Path::new("/nonexistent/path/xyz")));
478    }
479}