sqry_core/project/path_utils.rs
1//! Path canonicalization and resolution utilities for Project root handling
2//!
3//! Implements the path handling strategy from `PROJECT_ROOT_SPEC.md` and `02_DESIGN.md` \[H1\].
4//! All file paths are canonicalized before root resolution to ensure the invariant
5//! "at most one Project per `index_root`" holds regardless of how paths are accessed.
6
7use std::io;
8use std::path::{Component, Path, PathBuf};
9
10/// Canonicalize a path, resolving symlinks where possible.
11///
12/// This function attempts full canonicalization (resolving symlinks, `.`, `..`).
13/// On failure (path doesn't exist, permission denied, circular symlinks), it
14/// falls back to [`absolutize_without_resolution`] which normalizes without
15/// touching the filesystem.
16///
17/// # Platform Behavior
18///
19/// - **Linux**: Resolves symbolic links via `realpath(3)`
20/// - **macOS**: Resolves symbolic links; macOS aliases are NOT resolved
21/// - **Windows**: Resolves junction points, symbolic links, and NTFS reparse points
22///
23/// # Edge Cases (per `02_DESIGN.md` H1)
24///
25/// - **Broken symlinks**: Uses absolutized path; logs warning
26/// - **Circular symlinks**: Canonicalization fails; uses absolutized path; logs error
27/// - **Permission denied**: Uses absolutized path; logs warning with context
28///
29/// # Errors
30///
31/// Returns an error only if both canonicalization AND absolutize fail,
32/// which should only happen if `current_dir()` fails (extremely rare).
33///
34/// # Examples
35///
36/// ```
37/// use sqry_core::project::path_utils::canonicalize_path;
38/// use std::path::Path;
39///
40/// // Existing path - fully canonicalized
41/// let result = canonicalize_path(Path::new("/tmp"));
42/// assert!(result.is_ok());
43///
44/// // Non-existent path - absolutized without resolution
45/// let result = canonicalize_path(Path::new("/nonexistent/path/file.rs"));
46/// assert!(result.is_ok()); // Falls back to absolutize
47/// ```
48pub fn canonicalize_path(path: &Path) -> Result<PathBuf, io::Error> {
49 match std::fs::canonicalize(path) {
50 Ok(canonical) => Ok(canonical),
51 Err(e) => {
52 // Log the fallback - caller should handle appropriately
53 log::debug!(
54 "Canonicalization failed for '{}': {}. Using absolutize fallback.",
55 path.display(),
56 e
57 );
58 absolutize_without_resolution(path)
59 }
60 }
61}
62
63/// Absolutize a path without touching the filesystem.
64///
65/// This function provides a deterministic fallback when canonicalization fails.
66/// It:
67/// 1. Joins relative paths with the current working directory
68/// 2. Normalizes `.` and `..` components (purely lexically)
69///
70/// # Determinism Guarantee (per `02_DESIGN.md` C4)
71///
72/// This function is deterministic: two accesses to the same logical directory
73/// (even via different relative paths) produce the same result when called from
74/// the same working directory. This prevents duplicate Project creation.
75///
76/// # Errors
77///
78/// Returns an error if `std::env::current_dir()` fails (extremely rare).
79///
80/// # Examples
81///
82/// ```
83/// use sqry_core::project::path_utils::absolutize_without_resolution;
84/// use std::path::Path;
85///
86/// // Relative paths are joined with CWD and normalized
87/// let result1 = absolutize_without_resolution(Path::new("./foo/../bar"));
88/// let result2 = absolutize_without_resolution(Path::new("bar"));
89/// // Both resolve to same path (when called from same CWD)
90/// ```
91pub fn absolutize_without_resolution(path: &Path) -> Result<PathBuf, io::Error> {
92 // Get current working directory
93 let cwd = std::env::current_dir()?;
94
95 // Join with path if relative
96 let absolute = if path.is_absolute() {
97 path.to_path_buf()
98 } else {
99 cwd.join(path)
100 };
101
102 // Normalize . and .. components (without touching filesystem)
103 let normalized = normalize_path_components(&absolute);
104
105 Ok(normalized)
106}
107
108/// Normalize path components lexically (without filesystem access).
109///
110/// Handles `.` (current dir) and `..` (parent dir) components:
111/// - `.` components are removed
112/// - `..` components pop the previous component if possible
113/// - Preserves root prefix
114/// - Never produces empty path (returns "." if result would be empty)
115///
116/// # Platform Notes
117///
118/// - On Unix: Preserves leading `/`
119/// - On Windows: Preserves drive prefix (`C:\`) and UNC paths
120///
121/// # Examples
122///
123/// ```
124/// use sqry_core::project::path_utils::normalize_path_components;
125/// use std::path::Path;
126///
127/// let path = Path::new("/home/user/../user/./project");
128/// let normalized = normalize_path_components(path);
129/// assert_eq!(normalized, Path::new("/home/user/project"));
130/// ```
131#[must_use]
132pub fn normalize_path_components(path: &Path) -> PathBuf {
133 let mut components = Vec::new();
134
135 for component in path.components() {
136 match component {
137 Component::CurDir => {
138 // Skip `.` components
139 }
140 Component::ParentDir => {
141 // Pop last component if it's a normal component
142 // Don't pop RootDir, Prefix, or if empty
143 match components.last() {
144 Some(Component::Normal(_)) => {
145 components.pop();
146 }
147 Some(Component::ParentDir) | None => {
148 // Keep .. if we can't pop further (relative path going above start)
149 components.push(component);
150 }
151 _ => {
152 // Don't pop RootDir or Prefix
153 }
154 }
155 }
156 _ => {
157 // Keep Prefix, RootDir, and Normal components
158 components.push(component);
159 }
160 }
161 }
162
163 // Reconstruct path from components
164 if components.is_empty() {
165 PathBuf::from(".")
166 } else {
167 components.iter().collect()
168 }
169}
170
171/// Default directories to skip during repository detection.
172///
173/// These directories are commonly large dependency/build/cache directories
174/// that rarely contain git repositories worth indexing.
175///
176/// Note: `.git` is intentionally NOT in this list (we need to detect it).
177///
178/// Users can override this list via configuration (see Phase 5: Configuration Integration).
179pub const DEFAULT_IGNORED_DIRS: &[&str] = &[
180 "node_modules",
181 "target",
182 "build",
183 "dist",
184 "vendor",
185 ".cache",
186 ".npm",
187 ".cargo",
188 "__pycache__",
189 ".pytest_cache",
190 ".mypy_cache",
191 ".tox",
192 ".venv",
193 "venv",
194 ".gradle",
195 ".idea",
196 ".vs",
197 ".vscode",
198];
199
200/// Check if a directory entry should be ignored during repository detection.
201///
202/// Per `02_DESIGN.md`, we skip common ignored directories to speed up walking.
203/// Uses [`DEFAULT_IGNORED_DIRS`] for the ignore list.
204///
205/// Note: `.git` directories are NOT ignored (we need to detect them).
206///
207/// # Arguments
208///
209/// * `name` - The directory name to check
210///
211/// # See Also
212///
213/// Use [`is_ignored_dir_with_config`] for custom ignore lists.
214#[must_use]
215pub fn is_ignored_dir(name: &std::ffi::OsStr) -> bool {
216 is_ignored_dir_with_config(name, DEFAULT_IGNORED_DIRS)
217}
218
219/// Check if a directory entry should be ignored, using a custom ignore list.
220///
221/// This allows configuration of which directories to skip during repository
222/// detection. Useful when the default list doesn't match project needs.
223///
224/// # Arguments
225///
226/// * `name` - The directory name to check
227/// * `ignored_dirs` - List of directory names to ignore
228///
229/// # Examples
230///
231/// ```
232/// use sqry_core::project::path_utils::{is_ignored_dir_with_config, DEFAULT_IGNORED_DIRS};
233/// use std::ffi::OsStr;
234///
235/// // Using custom ignore list
236/// let custom_ignores = &["my_deps", "cached_stuff"];
237/// assert!(is_ignored_dir_with_config(OsStr::new("my_deps"), custom_ignores));
238/// assert!(!is_ignored_dir_with_config(OsStr::new("node_modules"), custom_ignores));
239///
240/// // Using default list
241/// assert!(is_ignored_dir_with_config(OsStr::new("node_modules"), DEFAULT_IGNORED_DIRS));
242/// ```
243#[must_use]
244pub fn is_ignored_dir_with_config(name: &std::ffi::OsStr, ignored_dirs: &[&str]) -> bool {
245 // Convert OsStr to str for comparison (if possible)
246 if let Some(name_str) = name.to_str() {
247 ignored_dirs.contains(&name_str)
248 } else {
249 false
250 }
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256 use std::path::Path;
257
258 #[test]
259 fn test_normalize_removes_current_dir() {
260 let path = Path::new("/home/./user/./project");
261 let result = normalize_path_components(path);
262 assert_eq!(result, Path::new("/home/user/project"));
263 }
264
265 #[test]
266 fn test_normalize_resolves_parent_dir() {
267 let path = Path::new("/home/user/../other/project");
268 let result = normalize_path_components(path);
269 assert_eq!(result, Path::new("/home/other/project"));
270 }
271
272 #[test]
273 fn test_normalize_combined() {
274 let path = Path::new("/home/user/../user/./project/./src/../lib");
275 let result = normalize_path_components(path);
276 assert_eq!(result, Path::new("/home/user/project/lib"));
277 }
278
279 #[test]
280 fn test_normalize_preserves_root() {
281 let path = Path::new("/");
282 let result = normalize_path_components(path);
283 assert_eq!(result, Path::new("/"));
284 }
285
286 #[test]
287 fn test_normalize_relative_path() {
288 let path = Path::new("foo/../bar");
289 let result = normalize_path_components(path);
290 assert_eq!(result, Path::new("bar"));
291 }
292
293 #[test]
294 fn test_normalize_relative_above_start() {
295 // Can't go above start of relative path - preserve ..
296 let path = Path::new("../foo");
297 let result = normalize_path_components(path);
298 assert_eq!(result, Path::new("../foo"));
299 }
300
301 #[test]
302 fn test_normalize_empty_result() {
303 // Should return "." not empty path
304 let path = Path::new("foo/..");
305 let result = normalize_path_components(path);
306 assert_eq!(result, Path::new("."));
307 }
308
309 #[test]
310 fn test_absolutize_determinism() {
311 // Per C4: same logical path via different relative paths should produce same result
312 // This test must run from a consistent CWD
313 let result1 = absolutize_without_resolution(Path::new("./foo/../bar")).unwrap();
314 let result2 = absolutize_without_resolution(Path::new("bar")).unwrap();
315 assert_eq!(result1, result2);
316 }
317
318 #[test]
319 fn test_absolutize_absolute_path_unchanged() {
320 #[cfg(unix)]
321 let path = Path::new("/absolute/path");
322 #[cfg(windows)]
323 let path = Path::new("C:\\absolute\\path");
324 let result = absolutize_without_resolution(path).unwrap();
325 assert_eq!(result, path);
326 }
327
328 #[test]
329 fn test_canonicalize_existing_path() {
330 // /tmp should exist on Unix systems
331 #[cfg(unix)]
332 {
333 let result = canonicalize_path(Path::new("/tmp"));
334 assert!(result.is_ok());
335 // Result should be absolute
336 assert!(result.unwrap().is_absolute());
337 }
338 }
339
340 #[test]
341 fn test_canonicalize_nonexistent_path_uses_fallback() {
342 let path = Path::new("/nonexistent/deeply/nested/path");
343 let result = canonicalize_path(path);
344 // Should succeed via fallback
345 assert!(result.is_ok());
346 let resolved = result.unwrap();
347 // Should be absolute
348 assert!(resolved.is_absolute());
349 // Should preserve the path structure (normalized)
350 assert!(resolved.to_string_lossy().contains("nonexistent"));
351 }
352
353 #[test]
354 fn test_is_ignored_dir() {
355 use std::ffi::OsStr;
356
357 assert!(is_ignored_dir(OsStr::new("node_modules")));
358 assert!(is_ignored_dir(OsStr::new("target")));
359 assert!(is_ignored_dir(OsStr::new("__pycache__")));
360
361 // .git is NOT ignored (we need to detect it)
362 assert!(!is_ignored_dir(OsStr::new(".git")));
363 assert!(!is_ignored_dir(OsStr::new("src")));
364 assert!(!is_ignored_dir(OsStr::new("lib")));
365 }
366
367 #[test]
368 fn test_is_ignored_dir_with_config_custom_list() {
369 use std::ffi::OsStr;
370
371 // Custom ignore list
372 let custom_ignores = &["my_deps", "cached_stuff", "third_party"];
373
374 // Custom dirs should be ignored
375 assert!(is_ignored_dir_with_config(
376 OsStr::new("my_deps"),
377 custom_ignores
378 ));
379 assert!(is_ignored_dir_with_config(
380 OsStr::new("cached_stuff"),
381 custom_ignores
382 ));
383 assert!(is_ignored_dir_with_config(
384 OsStr::new("third_party"),
385 custom_ignores
386 ));
387
388 // Default dirs NOT in custom list should NOT be ignored
389 assert!(!is_ignored_dir_with_config(
390 OsStr::new("node_modules"),
391 custom_ignores
392 ));
393 assert!(!is_ignored_dir_with_config(
394 OsStr::new("target"),
395 custom_ignores
396 ));
397
398 // Normal dirs should NOT be ignored
399 assert!(!is_ignored_dir_with_config(
400 OsStr::new("src"),
401 custom_ignores
402 ));
403 assert!(!is_ignored_dir_with_config(
404 OsStr::new(".git"),
405 custom_ignores
406 ));
407 }
408
409 #[test]
410 fn test_is_ignored_dir_with_config_empty_list() {
411 use std::ffi::OsStr;
412
413 // Empty ignore list = nothing ignored
414 let empty: &[&str] = &[];
415
416 assert!(!is_ignored_dir_with_config(
417 OsStr::new("node_modules"),
418 empty
419 ));
420 assert!(!is_ignored_dir_with_config(OsStr::new("target"), empty));
421 assert!(!is_ignored_dir_with_config(OsStr::new("src"), empty));
422 }
423
424 #[test]
425 fn test_is_ignored_dir_with_config_default_list() {
426 use std::ffi::OsStr;
427
428 // Using DEFAULT_IGNORED_DIRS should match is_ignored_dir()
429 assert_eq!(
430 is_ignored_dir(OsStr::new("node_modules")),
431 is_ignored_dir_with_config(OsStr::new("node_modules"), DEFAULT_IGNORED_DIRS)
432 );
433 assert_eq!(
434 is_ignored_dir(OsStr::new("src")),
435 is_ignored_dir_with_config(OsStr::new("src"), DEFAULT_IGNORED_DIRS)
436 );
437 }
438
439 #[test]
440 fn test_default_ignored_dirs_contains_common_dirs() {
441 // Verify the default list contains expected directories
442 assert!(DEFAULT_IGNORED_DIRS.contains(&"node_modules"));
443 assert!(DEFAULT_IGNORED_DIRS.contains(&"target"));
444 assert!(DEFAULT_IGNORED_DIRS.contains(&"vendor"));
445 assert!(DEFAULT_IGNORED_DIRS.contains(&"__pycache__"));
446 assert!(DEFAULT_IGNORED_DIRS.contains(&".venv"));
447 assert!(DEFAULT_IGNORED_DIRS.contains(&".idea"));
448
449 // .git should NOT be in the list
450 assert!(!DEFAULT_IGNORED_DIRS.contains(&".git"));
451 }
452
453 #[cfg(unix)]
454 #[test]
455 fn test_canonicalize_symlink() {
456 use std::os::unix::fs::symlink;
457 use tempfile::TempDir;
458
459 let temp = TempDir::new().unwrap();
460 let target = temp.path().join("target_dir");
461 let link = temp.path().join("link");
462
463 // Create target directory and symlink
464 std::fs::create_dir(&target).unwrap();
465 symlink(&target, &link).unwrap();
466
467 // Canonicalize should resolve symlink
468 let result = canonicalize_path(&link).unwrap();
469 let expected = canonicalize_path(&target).unwrap();
470 assert_eq!(result, expected);
471 }
472
473 #[cfg(unix)]
474 #[test]
475 fn test_canonicalize_broken_symlink_uses_fallback() {
476 use std::os::unix::fs::symlink;
477 use tempfile::TempDir;
478
479 let temp = TempDir::new().unwrap();
480 let link = temp.path().join("broken_link");
481
482 // Create symlink to nonexistent target
483 symlink("/nonexistent/target", &link).unwrap();
484
485 // Canonicalize should fall back to absolutize
486 let result = canonicalize_path(&link);
487 assert!(result.is_ok());
488 // Result should be absolutized version of link path
489 let resolved = result.unwrap();
490 assert!(resolved.is_absolute());
491 }
492}