Skip to main content

sqry_core/project/
path_utils.rs

1//! Path canonicalization and resolution utilities for Project root handling
2//!
3//! Implements the path handling strategy from `PROJECT_ROOT_SPEC.md` and `02_DESIGN.md` \[H1\].
4//! All file paths are canonicalized before root resolution to ensure the invariant
5//! "at most one Project per `index_root`" holds regardless of how paths are accessed.
6
7use std::io;
8use std::path::{Component, Path, PathBuf};
9
10/// Canonicalize a path, resolving symlinks where possible.
11///
12/// This function attempts full canonicalization (resolving symlinks, `.`, `..`).
13/// On failure (path doesn't exist, permission denied, circular symlinks), it
14/// falls back to [`absolutize_without_resolution`] which normalizes without
15/// touching the filesystem.
16///
17/// # Platform Behavior
18///
19/// - **Linux**: Resolves symbolic links via `realpath(3)`
20/// - **macOS**: Resolves symbolic links; macOS aliases are NOT resolved
21/// - **Windows**: Resolves junction points, symbolic links, and NTFS reparse points
22///
23/// # Edge Cases (per `02_DESIGN.md` H1)
24///
25/// - **Broken symlinks**: Uses absolutized path; logs warning
26/// - **Circular symlinks**: Canonicalization fails; uses absolutized path; logs error
27/// - **Permission denied**: Uses absolutized path; logs warning with context
28///
29/// # Errors
30///
31/// Returns an error only if both canonicalization AND absolutize fail,
32/// which should only happen if `current_dir()` fails (extremely rare).
33///
34/// # Examples
35///
36/// ```
37/// use sqry_core::project::path_utils::canonicalize_path;
38/// use std::path::Path;
39///
40/// // Existing path - fully canonicalized
41/// let result = canonicalize_path(Path::new("/tmp"));
42/// assert!(result.is_ok());
43///
44/// // Non-existent path - absolutized without resolution
45/// let result = canonicalize_path(Path::new("/nonexistent/path/file.rs"));
46/// assert!(result.is_ok()); // Falls back to absolutize
47/// ```
48pub fn canonicalize_path(path: &Path) -> Result<PathBuf, io::Error> {
49    match std::fs::canonicalize(path) {
50        Ok(canonical) => Ok(canonical),
51        Err(e) => {
52            // Log the fallback - caller should handle appropriately
53            log::debug!(
54                "Canonicalization failed for '{}': {}. Using absolutize fallback.",
55                path.display(),
56                e
57            );
58            absolutize_without_resolution(path)
59        }
60    }
61}
62
63/// Absolutize a path without touching the filesystem.
64///
65/// This function provides a deterministic fallback when canonicalization fails.
66/// It:
67/// 1. Joins relative paths with the current working directory
68/// 2. Normalizes `.` and `..` components (purely lexically)
69///
70/// # Determinism Guarantee (per `02_DESIGN.md` C4)
71///
72/// This function is deterministic: two accesses to the same logical directory
73/// (even via different relative paths) produce the same result when called from
74/// the same working directory. This prevents duplicate Project creation.
75///
76/// # Errors
77///
78/// Returns an error if `std::env::current_dir()` fails (extremely rare).
79///
80/// # Examples
81///
82/// ```
83/// use sqry_core::project::path_utils::absolutize_without_resolution;
84/// use std::path::Path;
85///
86/// // Relative paths are joined with CWD and normalized
87/// let result1 = absolutize_without_resolution(Path::new("./foo/../bar"));
88/// let result2 = absolutize_without_resolution(Path::new("bar"));
89/// // Both resolve to same path (when called from same CWD)
90/// ```
91pub fn absolutize_without_resolution(path: &Path) -> Result<PathBuf, io::Error> {
92    // Get current working directory
93    let cwd = std::env::current_dir()?;
94
95    // Join with path if relative
96    let absolute = if path.is_absolute() {
97        path.to_path_buf()
98    } else {
99        cwd.join(path)
100    };
101
102    // Normalize . and .. components (without touching filesystem)
103    let normalized = normalize_path_components(&absolute);
104
105    Ok(normalized)
106}
107
108/// Normalize path components lexically (without filesystem access).
109///
110/// Handles `.` (current dir) and `..` (parent dir) components:
111/// - `.` components are removed
112/// - `..` components pop the previous component if possible
113/// - Preserves root prefix
114/// - Never produces empty path (returns "." if result would be empty)
115///
116/// # Platform Notes
117///
118/// - On Unix: Preserves leading `/`
119/// - On Windows: Preserves drive prefix (`C:\`) and UNC paths
120///
121/// # Examples
122///
123/// ```
124/// use sqry_core::project::path_utils::normalize_path_components;
125/// use std::path::Path;
126///
127/// let path = Path::new("/home/user/../user/./project");
128/// let normalized = normalize_path_components(path);
129/// assert_eq!(normalized, Path::new("/home/user/project"));
130/// ```
131#[must_use]
132pub fn normalize_path_components(path: &Path) -> PathBuf {
133    let mut components = Vec::new();
134
135    for component in path.components() {
136        match component {
137            Component::CurDir => {
138                // Skip `.` components
139            }
140            Component::ParentDir => {
141                // Pop last component if it's a normal component
142                // Don't pop RootDir, Prefix, or if empty
143                match components.last() {
144                    Some(Component::Normal(_)) => {
145                        components.pop();
146                    }
147                    Some(Component::ParentDir) | None => {
148                        // Keep .. if we can't pop further (relative path going above start)
149                        components.push(component);
150                    }
151                    _ => {
152                        // Don't pop RootDir or Prefix
153                    }
154                }
155            }
156            _ => {
157                // Keep Prefix, RootDir, and Normal components
158                components.push(component);
159            }
160        }
161    }
162
163    // Reconstruct path from components
164    if components.is_empty() {
165        PathBuf::from(".")
166    } else {
167        components.iter().collect()
168    }
169}
170
171/// Default directories to skip during repository detection.
172///
173/// These directories are commonly large dependency/build/cache directories
174/// that rarely contain git repositories worth indexing.
175///
176/// Note: `.git` is intentionally NOT in this list (we need to detect it).
177///
178/// Users can override this list via configuration (see Phase 5: Configuration Integration).
179pub const DEFAULT_IGNORED_DIRS: &[&str] = &[
180    "node_modules",
181    "target",
182    "build",
183    "dist",
184    "vendor",
185    ".cache",
186    ".npm",
187    ".cargo",
188    "__pycache__",
189    ".pytest_cache",
190    ".mypy_cache",
191    ".tox",
192    ".venv",
193    "venv",
194    ".gradle",
195    ".idea",
196    ".vs",
197    ".vscode",
198];
199
200/// Check if a directory entry should be ignored during repository detection.
201///
202/// Per `02_DESIGN.md`, we skip common ignored directories to speed up walking.
203/// Uses [`DEFAULT_IGNORED_DIRS`] for the ignore list.
204///
205/// Note: `.git` directories are NOT ignored (we need to detect them).
206///
207/// # Arguments
208///
209/// * `name` - The directory name to check
210///
211/// # See Also
212///
213/// Use [`is_ignored_dir_with_config`] for custom ignore lists.
214#[must_use]
215pub fn is_ignored_dir(name: &std::ffi::OsStr) -> bool {
216    is_ignored_dir_with_config(name, DEFAULT_IGNORED_DIRS)
217}
218
219/// Check if a directory entry should be ignored, using a custom ignore list.
220///
221/// This allows configuration of which directories to skip during repository
222/// detection. Useful when the default list doesn't match project needs.
223///
224/// # Arguments
225///
226/// * `name` - The directory name to check
227/// * `ignored_dirs` - List of directory names to ignore
228///
229/// # Examples
230///
231/// ```
232/// use sqry_core::project::path_utils::{is_ignored_dir_with_config, DEFAULT_IGNORED_DIRS};
233/// use std::ffi::OsStr;
234///
235/// // Using custom ignore list
236/// let custom_ignores = &["my_deps", "cached_stuff"];
237/// assert!(is_ignored_dir_with_config(OsStr::new("my_deps"), custom_ignores));
238/// assert!(!is_ignored_dir_with_config(OsStr::new("node_modules"), custom_ignores));
239///
240/// // Using default list
241/// assert!(is_ignored_dir_with_config(OsStr::new("node_modules"), DEFAULT_IGNORED_DIRS));
242/// ```
243#[must_use]
244pub fn is_ignored_dir_with_config(name: &std::ffi::OsStr, ignored_dirs: &[&str]) -> bool {
245    // Convert OsStr to str for comparison (if possible)
246    if let Some(name_str) = name.to_str() {
247        ignored_dirs.contains(&name_str)
248    } else {
249        false
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256    use std::path::Path;
257
258    #[test]
259    fn test_normalize_removes_current_dir() {
260        let path = Path::new("/home/./user/./project");
261        let result = normalize_path_components(path);
262        assert_eq!(result, Path::new("/home/user/project"));
263    }
264
265    #[test]
266    fn test_normalize_resolves_parent_dir() {
267        let path = Path::new("/home/user/../other/project");
268        let result = normalize_path_components(path);
269        assert_eq!(result, Path::new("/home/other/project"));
270    }
271
272    #[test]
273    fn test_normalize_combined() {
274        let path = Path::new("/home/user/../user/./project/./src/../lib");
275        let result = normalize_path_components(path);
276        assert_eq!(result, Path::new("/home/user/project/lib"));
277    }
278
279    #[test]
280    fn test_normalize_preserves_root() {
281        let path = Path::new("/");
282        let result = normalize_path_components(path);
283        assert_eq!(result, Path::new("/"));
284    }
285
286    #[test]
287    fn test_normalize_relative_path() {
288        let path = Path::new("foo/../bar");
289        let result = normalize_path_components(path);
290        assert_eq!(result, Path::new("bar"));
291    }
292
293    #[test]
294    fn test_normalize_relative_above_start() {
295        // Can't go above start of relative path - preserve ..
296        let path = Path::new("../foo");
297        let result = normalize_path_components(path);
298        assert_eq!(result, Path::new("../foo"));
299    }
300
301    #[test]
302    fn test_normalize_empty_result() {
303        // Should return "." not empty path
304        let path = Path::new("foo/..");
305        let result = normalize_path_components(path);
306        assert_eq!(result, Path::new("."));
307    }
308
309    #[test]
310    fn test_absolutize_determinism() {
311        // Per C4: same logical path via different relative paths should produce same result
312        // This test must run from a consistent CWD
313        let result1 = absolutize_without_resolution(Path::new("./foo/../bar")).unwrap();
314        let result2 = absolutize_without_resolution(Path::new("bar")).unwrap();
315        assert_eq!(result1, result2);
316    }
317
318    #[test]
319    fn test_absolutize_absolute_path_unchanged() {
320        #[cfg(unix)]
321        let path = Path::new("/absolute/path");
322        #[cfg(windows)]
323        let path = Path::new("C:\\absolute\\path");
324        let result = absolutize_without_resolution(path).unwrap();
325        assert_eq!(result, path);
326    }
327
328    #[test]
329    fn test_canonicalize_existing_path() {
330        // /tmp should exist on Unix systems
331        #[cfg(unix)]
332        {
333            let result = canonicalize_path(Path::new("/tmp"));
334            assert!(result.is_ok());
335            // Result should be absolute
336            assert!(result.unwrap().is_absolute());
337        }
338    }
339
340    #[test]
341    fn test_canonicalize_nonexistent_path_uses_fallback() {
342        let path = Path::new("/nonexistent/deeply/nested/path");
343        let result = canonicalize_path(path);
344        // Should succeed via fallback
345        assert!(result.is_ok());
346        let resolved = result.unwrap();
347        // Should be absolute
348        assert!(resolved.is_absolute());
349        // Should preserve the path structure (normalized)
350        assert!(resolved.to_string_lossy().contains("nonexistent"));
351    }
352
353    #[test]
354    fn test_is_ignored_dir() {
355        use std::ffi::OsStr;
356
357        assert!(is_ignored_dir(OsStr::new("node_modules")));
358        assert!(is_ignored_dir(OsStr::new("target")));
359        assert!(is_ignored_dir(OsStr::new("__pycache__")));
360
361        // .git is NOT ignored (we need to detect it)
362        assert!(!is_ignored_dir(OsStr::new(".git")));
363        assert!(!is_ignored_dir(OsStr::new("src")));
364        assert!(!is_ignored_dir(OsStr::new("lib")));
365    }
366
367    #[test]
368    fn test_is_ignored_dir_with_config_custom_list() {
369        use std::ffi::OsStr;
370
371        // Custom ignore list
372        let custom_ignores = &["my_deps", "cached_stuff", "third_party"];
373
374        // Custom dirs should be ignored
375        assert!(is_ignored_dir_with_config(
376            OsStr::new("my_deps"),
377            custom_ignores
378        ));
379        assert!(is_ignored_dir_with_config(
380            OsStr::new("cached_stuff"),
381            custom_ignores
382        ));
383        assert!(is_ignored_dir_with_config(
384            OsStr::new("third_party"),
385            custom_ignores
386        ));
387
388        // Default dirs NOT in custom list should NOT be ignored
389        assert!(!is_ignored_dir_with_config(
390            OsStr::new("node_modules"),
391            custom_ignores
392        ));
393        assert!(!is_ignored_dir_with_config(
394            OsStr::new("target"),
395            custom_ignores
396        ));
397
398        // Normal dirs should NOT be ignored
399        assert!(!is_ignored_dir_with_config(
400            OsStr::new("src"),
401            custom_ignores
402        ));
403        assert!(!is_ignored_dir_with_config(
404            OsStr::new(".git"),
405            custom_ignores
406        ));
407    }
408
409    #[test]
410    fn test_is_ignored_dir_with_config_empty_list() {
411        use std::ffi::OsStr;
412
413        // Empty ignore list = nothing ignored
414        let empty: &[&str] = &[];
415
416        assert!(!is_ignored_dir_with_config(
417            OsStr::new("node_modules"),
418            empty
419        ));
420        assert!(!is_ignored_dir_with_config(OsStr::new("target"), empty));
421        assert!(!is_ignored_dir_with_config(OsStr::new("src"), empty));
422    }
423
424    #[test]
425    fn test_is_ignored_dir_with_config_default_list() {
426        use std::ffi::OsStr;
427
428        // Using DEFAULT_IGNORED_DIRS should match is_ignored_dir()
429        assert_eq!(
430            is_ignored_dir(OsStr::new("node_modules")),
431            is_ignored_dir_with_config(OsStr::new("node_modules"), DEFAULT_IGNORED_DIRS)
432        );
433        assert_eq!(
434            is_ignored_dir(OsStr::new("src")),
435            is_ignored_dir_with_config(OsStr::new("src"), DEFAULT_IGNORED_DIRS)
436        );
437    }
438
439    #[test]
440    fn test_default_ignored_dirs_contains_common_dirs() {
441        // Verify the default list contains expected directories
442        assert!(DEFAULT_IGNORED_DIRS.contains(&"node_modules"));
443        assert!(DEFAULT_IGNORED_DIRS.contains(&"target"));
444        assert!(DEFAULT_IGNORED_DIRS.contains(&"vendor"));
445        assert!(DEFAULT_IGNORED_DIRS.contains(&"__pycache__"));
446        assert!(DEFAULT_IGNORED_DIRS.contains(&".venv"));
447        assert!(DEFAULT_IGNORED_DIRS.contains(&".idea"));
448
449        // .git should NOT be in the list
450        assert!(!DEFAULT_IGNORED_DIRS.contains(&".git"));
451    }
452
453    #[cfg(unix)]
454    #[test]
455    fn test_canonicalize_symlink() {
456        use std::os::unix::fs::symlink;
457        use tempfile::TempDir;
458
459        let temp = TempDir::new().unwrap();
460        let target = temp.path().join("target_dir");
461        let link = temp.path().join("link");
462
463        // Create target directory and symlink
464        std::fs::create_dir(&target).unwrap();
465        symlink(&target, &link).unwrap();
466
467        // Canonicalize should resolve symlink
468        let result = canonicalize_path(&link).unwrap();
469        let expected = canonicalize_path(&target).unwrap();
470        assert_eq!(result, expected);
471    }
472
473    #[cfg(unix)]
474    #[test]
475    fn test_canonicalize_broken_symlink_uses_fallback() {
476        use std::os::unix::fs::symlink;
477        use tempfile::TempDir;
478
479        let temp = TempDir::new().unwrap();
480        let link = temp.path().join("broken_link");
481
482        // Create symlink to nonexistent target
483        symlink("/nonexistent/target", &link).unwrap();
484
485        // Canonicalize should fall back to absolutize
486        let result = canonicalize_path(&link);
487        assert!(result.is_ok());
488        // Result should be absolutized version of link path
489        let resolved = result.unwrap();
490        assert!(resolved.is_absolute());
491    }
492}