soft_canonicalize/lib.rs
1//! # soft-canonicalize
2//!
3//! **Path canonicalization that works with non-existing paths.**
4//!
5//! Rust implementation inspired by Python 3.6+ `pathlib.Path.resolve(strict=False)`, providing
6//! the same functionality as `std::fs::canonicalize` (Rust's equivalent to Unix `realpath()`)
7//! but extended to handle non-existing paths, with optional features for simplified Windows
8//! output (`dunce`) and virtual filesystem semantics (`anchored`).
9//!
10//! ## Why Use This?
11//!
12//! - **🚀 Works with non-existing paths** - Plan file locations before creating them
13//! - **⚡ Fast** - Optimized performance with minimal allocations and syscalls
14//! - **✅ Compatible** - 100% behavioral match with `std::fs::canonicalize` for existing paths, with optional UNC simplification via `dunce` feature (Windows)
15//! - **🎯 Virtual filesystem support** - Optional `anchored` feature for bounded canonicalization within directory boundaries
16//! - **🔒 Robust** - 495 comprehensive tests covering edge cases and security scenarios
17//! - **🛡️ Safe traversal** - Proper `..` and symlink resolution with cycle detection
18//! - **🌍 Cross-platform** - Windows, macOS, Linux with comprehensive UNC/symlink handling
19//! - **🔧 Zero dependencies** - Optional features may add minimal dependencies
20//!
21//! ## Lexical vs. Filesystem-Based Resolution
22//!
23//! Path resolution libraries fall into two categories:
24//!
25//! **Lexical Resolution** (no I/O):
26//! - **Performance**: Fast - no filesystem access
27//! - **Accuracy**: Incorrect if symlinks are present (doesn't resolve them)
28//! - **Use when**: You're 100% certain no symlinks exist and need maximum performance
29//! - **Examples**: `std::path::absolute`, `normpath::normalize`
30//!
31//! **Filesystem-Based Resolution** (performs I/O):
32//! - **Performance**: Slower - requires filesystem syscalls to resolve symlinks
33//! - **Accuracy**: Correct - follows symlinks to their targets
34//! - **Use when**: Safety is priority over performance, or symlinks may be present
35//! - **Examples**: `std::fs::canonicalize`, `soft_canonicalize`, `dunce::canonicalize`
36//!
37//! **Rule of thumb**: If you cannot guarantee symlinks won't be introduced, or if correctness is critical, use filesystem-based resolution.
38//!
39//! ## Use Cases
40//!
41//! ### Path Comparison
42//!
43//! - **Equality**: Determine if two different path strings point to the same location
44//! - **Containment**: Check if one path is inside another directory
45//!
46//! ### Common Applications
47//!
48//! - **Build Systems**: Resolve output paths during build planning before directories exist
49//! - **Configuration Validation**: Ensure user-provided paths stay within allowed boundaries
50//! - **Deduplication**: Detect when different path strings refer to the same planned location
51//! - **Cross-Platform Normalization**: Handle Windows UNC paths and symlinks consistently
52//!
53//! ## Quick Start
54//!
55//! ```toml
56//! [dependencies]
57//! soft-canonicalize = "0.5"
58//! ```
59//!
60//! ### Basic Example
61//!
62//! ```rust
63//! # #[cfg(windows)]
64//! # {
65//! use soft_canonicalize::soft_canonicalize;
66//!
67//! let non_existing_path = r"C:\Users\user\documents\..\non\existing\config.json";
68//!
69//! // Using Rust's own std canonicalize function:
70//! let result = std::fs::canonicalize(non_existing_path);
71//! assert!(result.is_err());
72//!
73//! // Using our crate's function:
74//! let result = soft_canonicalize(non_existing_path);
75//! assert!(result.is_ok());
76//!
77//! // Shows the UNC path conversion and path normalization
78//! # #[cfg(not(feature = "dunce"))]
79//! assert_eq!(
80//! result.unwrap().to_string_lossy(),
81//! r"\\?\C:\Users\user\non\existing\config.json"
82//! );
83//!
84//! // With `dunce` feature enabled, paths are simplified when safe
85//! # #[cfg(feature = "dunce")]
86//! assert_eq!(
87//! result.unwrap().to_string_lossy(),
88//! r"C:\Users\user\non\existing\config.json"
89//! );
90//! # }
91//! # Ok::<(), std::io::Error>(())
92//! ```
93//!
94//! ## Optional Features
95//!
96//! ### Anchored Canonicalization (`anchored` feature)
97//!
98//! For **correct symlink resolution within virtual/constrained directory spaces**, use
99//! `anchored_canonicalize`. This function implements true virtual filesystem semantics by
100//! clamping ALL paths (including absolute symlink targets) to the anchor directory:
101//!
102//! ```toml
103//! [dependencies]
104//! soft-canonicalize = { version = "0.5", features = ["anchored"] }
105//! ```
106//!
107//! ```rust
108//! # #[cfg(feature = "anchored")]
109//! use soft_canonicalize::anchored_canonicalize;
110//! # #[cfg(not(feature = "anchored"))]
111//! # use soft_canonicalize::soft_canonicalize;
112//! use std::fs;
113//!
114//! # fn example() -> Result<(), std::io::Error> {
115//! // Set up an anchor/root directory (no need to pre-canonicalize)
116//! let anchor = std::env::temp_dir().join("workspace_root");
117//! fs::create_dir_all(&anchor)?;
118//!
119//! // Canonicalize paths relative to the anchor (anchor is soft-canonicalized internally)
120//! # #[cfg(feature = "anchored")]
121//! let resolved_path = anchored_canonicalize(&anchor, "../../../etc/passwd")?;
122//! # #[cfg(not(feature = "anchored"))]
123//! # { let _ = (&anchor, "../../../etc/passwd"); }
124//! // Result: /tmp/workspace_root/etc/passwd (lexical .. clamped to anchor)
125//!
126//! // Absolute symlinks are also clamped to the anchor
127//! // If there's a symlink: workspace_root/config -> /etc/config
128//! // It resolves to: workspace_root/etc/config (clamped to anchor)
129//! # #[cfg(feature = "anchored")]
130//! let symlink_path = anchored_canonicalize(&anchor, "config")?;
131//! # #[cfg(not(feature = "anchored"))]
132//! # { let _ = "config"; }
133//! // Safe: always stays within workspace_root, even if symlink points to /etc/config
134//! # Ok(())
135//! # }
136//! ```
137//!
138//! **Key features:**
139//! - Virtual filesystem semantics: All absolute paths (including symlink targets) are clamped to anchor
140//! - Anchor-relative canonicalization: Resolves paths relative to a specific anchor directory
141//! - Complete symlink clamping: Follows symlink chains with clamping at each step
142//! - Component-by-component: Processes path components in proper order
143//! - Absolute results: Always returns absolute canonical paths within the anchor boundary
144//!
145//! **For a complete multi-tenant security example**, run:
146//! ```bash
147//! cargo run --example virtual_filesystem_demo --features anchored
148//! ```
149//!
150//! ### Simplified Path Output (`dunce` feature, Windows-only)
151//!
152//! By default, `soft_canonicalize` returns Windows paths in extended-length UNC format
153//! (`\\?\C:\foo`) for maximum robustness and compatibility with long paths, reserved names,
154//! and other Windows filesystem edge cases.
155//!
156//! If you need simplified paths (`C:\foo`) for compatibility with legacy applications or
157//! user-facing output, enable the **`dunce` feature**:
158//!
159//! ```toml
160//! [dependencies]
161//! soft-canonicalize = { version = "0.5", features = ["dunce"] }
162//! ```
163//!
164//! **Example:**
165//!
166//! ```rust
167//! use soft_canonicalize::soft_canonicalize;
168//! # fn example() -> Result<(), std::io::Error> {
169//! # #[cfg(windows)]
170//! # {
171//! let path = soft_canonicalize(r"C:\Users\user\documents\..\config.json")?;
172//!
173//! // Without dunce feature (default):
174//! // Returns: \\?\C:\Users\user\config.json (extended-length UNC)
175//!
176//! // With dunce feature enabled:
177//! // Returns: C:\Users\user\config.json (simplified when safe)
178//! # }
179//! # Ok(())
180//! # }
181//! ```
182//!
183//! **When to use:**
184//! - ✅ Legacy applications that don't support UNC paths
185//! - ✅ User-facing output requiring familiar path format
186//! - ✅ Tools expecting traditional Windows path format
187//!
188//! **How it works:**
189//!
190//! The [dunce](https://crates.io/crates/dunce) crate intelligently simplifies Windows UNC paths
191//! (`\\?\C:\foo` → `C:\foo`) **only when safe**:
192//! - Automatically keeps UNC for paths >260 chars
193//! - Automatically keeps UNC for reserved names (CON, PRN, NUL, COM1-9, LPT1-9)
194//! - Automatically keeps UNC for paths with trailing spaces/dots
195//! - Automatically keeps UNC for paths containing `..` (literal interpretation)
196//!
197//! ## When Paths Must Exist: `proc-canonicalize`
198//!
199//! Since v0.5.0, `soft_canonicalize` uses [`proc-canonicalize`](https://crates.io/crates/proc-canonicalize)
200//! by default for existing-path canonicalization instead of `std::fs::canonicalize`. This fixes a
201//! critical issue with Linux namespace boundaries.
202//!
203//! **The Problem**: On Linux, `std::fs::canonicalize` resolves "magic symlinks" like `/proc/PID/root`
204//! to their targets, losing the namespace boundary:
205//!
206//! ```rust
207//! # #[cfg(all(target_os = "linux", feature = "proc-canonicalize"))]
208//! # fn main() -> std::io::Result<()> {
209//! // /proc/self/root is a "magic symlink" pointing to the current process's root filesystem
210//! // std::fs::canonicalize incorrectly resolves it to "/"
211//! let std_result = std::fs::canonicalize("/proc/self/root")?;
212//! assert_eq!(std_result.to_string_lossy(), "/"); // Wrong! Namespace boundary lost
213//!
214//! // proc_canonicalize preserves the namespace boundary
215//! let proc_result = proc_canonicalize::canonicalize("/proc/self/root")?;
216//! assert_eq!(proc_result.to_string_lossy(), "/proc/self/root"); // Correct!
217//! # Ok(())
218//! # }
219//! # #[cfg(not(all(target_os = "linux", feature = "proc-canonicalize")))]
220//! # fn main() {}
221//! ```
222//!
223//! **Recommendation**: If you need to canonicalize paths that **must exist** (and would previously
224//! use `std::fs::canonicalize`), use `proc_canonicalize::canonicalize` for correct Linux namespace
225//! handling:
226//!
227//! ```toml
228//! [dependencies]
229//! proc-canonicalize = "0.0"
230//! ```
231//!
232//! ## Security & CVE Coverage
233//!
234//! Security does not depend on enabling features. The core API is secure-by-default; the optional
235//! `anchored` feature is a convenience for virtual roots. We test all modes (no features;
236//! `--features anchored`; `--features anchored,dunce`).
237//!
238//! **Built-in protections include:**
239//! - **NTFS Alternate Data Stream (ADS) validation** - Blocks malicious stream placements and traversal attempts
240//! - **Symlink cycle detection** - Bounded depth tracking prevents infinite loops
241//! - **Path traversal clamping** - Never ascends past root/share/device boundaries
242//! - **Null byte rejection** - Early validation prevents injection attacks
243//! - **UNC/device semantics** - Preserves Windows extended-length and device namespace integrity
244//! - **TOCTOU race resistance** - Tested against time-of-check-time-of-use attacks
245//!
246//! See [`docs/SECURITY.md`](https://github.com/DK26/soft-canonicalize-rs/blob/dev/docs/SECURITY.md)
247//! for detailed analysis, attack scenarios, and test references.
248//!
249//! ## Cross-Platform Notes
250//!
251//! - Windows: returns extended-length verbatim paths for absolute results (`\\?\C:\…`, `\\?\UNC\…`)
252//! - With `dunce` feature: returns simplified paths (`C:\…`) when safe
253//! - Unix-like systems: standard absolute and relative path semantics
254//! - UNC floors and device namespaces are preserved and respected
255//!
256//! ## Testing
257//!
258//! 495 tests including:
259//! - std::fs::canonicalize compatibility tests (existing paths)
260//! - Path traversal and robustness tests
261//! - Python pathlib-inspired behavior checks
262//! - Platform-specific cases (Windows/macOS/Linux)
263//! - Symlink semantics and cycle detection
264//! - Windows-specific UNC, 8.3, and ADS validation
265//! - Anchored canonicalization tests (with `anchored` feature)
266//!
267//! ## Known Limitation (Windows 8.3)
268//!
269//! On Windows, for non-existing paths we cannot determine equivalence between a short (8.3)
270//! name and its long form. Existing paths are canonicalized to the same result.
271//!
272//! ```rust
273//! use soft_canonicalize::soft_canonicalize;
274//! # fn example() -> Result<(), std::io::Error> {
275//! # #[cfg(windows)]
276//! # {
277//! let short_form = soft_canonicalize("C:/PROGRA~1/MyApp/config.json")?;
278//! let long_form = soft_canonicalize("C:/Program Files/MyApp/config.json")?;
279//! assert_ne!(short_form, long_form); // for non-existing suffixes
280//! # }
281//! # Ok(())
282//! # }
283//! ```
284//!
285//! ## How It Works
286//!
287//! For those interested in the implementation details, here's how `soft_canonicalize` processes paths:
288//!
289//! 1. Input validation (empty path, platform pre-checks)
290//! 2. Convert to absolute path (preserving drive/root semantics)
291//! 3. Fast-path: try `fs::canonicalize` on the original absolute path
292//! 4. Lexically normalize `.` and `..` (fast-path optimization for whole-path existence check)
293//! 5. Fast-path: try `fs::canonicalize` on the normalized path when different
294//! 6. Validate null bytes (platform-specific)
295//! 7. Discover deepest existing prefix with **symlink-first** semantics: resolve symlinks incrementally, then process `.` and `..` relative to resolved targets
296//! 8. Optionally canonicalize the anchor (if symlinks seen) and rebuild
297//! 9. Append non-existing suffix lexically, then normalize if needed
298//! 10. Windows: ensure extended-length prefix for absolute paths
299//! 11. Optional: simplify Windows paths when `dunce` feature enabled
300
301mod error;
302mod normalize;
303mod prefix;
304mod symlink;
305#[cfg(windows)]
306mod windows;
307
308pub use error::{IoErrorPathExt, SoftCanonicalizeError};
309pub use symlink::MAX_SYMLINK_DEPTH;
310
311use crate::error::error_with_path;
312use crate::normalize::simple_normalize_path;
313use crate::prefix::compute_existing_prefix;
314#[cfg(windows)]
315use crate::windows::{
316 ensure_windows_extended_prefix, has_windows_short_component, is_incomplete_unc,
317 validate_windows_ads_layout,
318};
319
320use std::io;
321use std::path::{Path, PathBuf};
322
323// Canonicalization backend selection (priority order):
324// 1. proc-canonicalize feature (default): fixes Linux /proc/PID/root magic symlinks,
325// and delegates to dunce when both features are enabled
326// 2. dunce feature on Windows (without proc-canonicalize): uses dunce::canonicalize
327// 3. fallback: uses std::fs::canonicalize
328#[cfg(feature = "proc-canonicalize")]
329use proc_canonicalize::canonicalize as fs_canonicalize;
330
331#[cfg(all(not(feature = "proc-canonicalize"), feature = "dunce", windows))]
332use dunce::canonicalize as fs_canonicalize;
333
334#[cfg(all(
335 not(feature = "proc-canonicalize"),
336 not(all(feature = "dunce", windows))
337))]
338use std::fs::canonicalize as fs_canonicalize;
339
340#[inline]
341fn path_contains_nul(p: &Path) -> bool {
342 #[cfg(unix)]
343 {
344 use std::os::unix::ffi::OsStrExt;
345 p.as_os_str().as_bytes().contains(&0)
346 }
347 #[cfg(windows)]
348 {
349 use std::os::windows::ffi::OsStrExt;
350 p.as_os_str().encode_wide().any(|u| u == 0)
351 }
352 #[cfg(not(any(unix, windows)))]
353 {
354 // Fallback for other platforms
355 return false;
356 }
357}
358
359#[inline]
360fn reject_nul_bytes(p: &Path) -> io::Result<()> {
361 if path_contains_nul(p) {
362 return Err(error_with_path(
363 io::ErrorKind::InvalidInput,
364 p,
365 "path contains null byte",
366 ));
367 }
368 Ok(())
369}
370
371/// Performs "soft" canonicalization on a path.
372///
373/// Unlike `std::fs::canonicalize()`, this function works with non-existent paths by:
374/// 1. Finding the deepest existing ancestor directory
375/// 2. Canonicalizing that existing part (resolving symlinks, normalizing case, etc.)
376/// 3. Appending the non-existing path components to the canonicalized base
377///
378/// This provides canonicalization benefits (symlink resolution, path normalization)
379/// without requiring the entire path to exist.
380///
381/// # Output Format
382///
383/// **Without `dunce` feature (default):**
384/// - Windows: Returns extended-length UNC paths (`\\?\C:\foo`) for maximum robustness
385/// - Unix: Returns standard absolute paths (`/foo`)
386///
387/// **With `dunce` feature enabled:**
388/// - Windows: Returns simplified paths (`C:\foo`) when safe to do so
389/// - Unix: Returns standard absolute paths (`/foo`) - no change
390///
391/// See the [module documentation](crate#optional-features) for details on the `dunce` feature.
392#[must_use = "this function returns a new PathBuf without modifying the input"]
393#[doc(alias = "realpath")]
394#[doc(alias = "canonicalize")]
395#[doc(alias = "resolve")]
396#[doc(alias = "absolute")]
397pub fn soft_canonicalize(path: impl AsRef<Path>) -> io::Result<PathBuf> {
398 let path = path.as_ref();
399
400 // Stage 0: guard-rail — handle empty path early (aligns with std::fs::canonicalize)
401 if path.as_os_str().is_empty() {
402 return Err(error_with_path(
403 io::ErrorKind::NotFound,
404 path,
405 "The system cannot find the path specified.",
406 ));
407 }
408
409 // Windows-only: explicit guard — reject incomplete UNC roots (\\server without a share)
410 #[cfg(windows)]
411 {
412 if is_incomplete_unc(path) {
413 return Err(error_with_path(
414 io::ErrorKind::InvalidInput,
415 path,
416 "invalid UNC path: missing share",
417 ));
418 }
419 }
420
421 // Stage 1: convert to absolute path (preserves drive/root semantics)
422 let absolute_path = if path.is_absolute() {
423 path.to_path_buf()
424 } else {
425 std::env::current_dir()?.join(path)
426 };
427
428 // Windows-only EARLY ADS validation (before lexical normalization)
429 #[cfg(windows)]
430 validate_windows_ads_layout(&absolute_path)?;
431
432 // Stage 1.5: fast-path — attempt std canonicalize on the ORIGINAL absolute path first.
433 match fs_canonicalize(&absolute_path) {
434 Ok(p) => return Ok(p),
435 Err(e) => match e.kind() {
436 io::ErrorKind::NotFound => { /* continue to boundary detection */ }
437 io::ErrorKind::InvalidInput | io::ErrorKind::PermissionDenied => return Err(e),
438 _ => { /* continue to optimized boundary detection */ }
439 },
440 }
441
442 // Stage 2: pre-normalize lexically (resolve . and .. without touching the filesystem)
443 let normalized_path = simple_normalize_path(&absolute_path);
444
445 // Windows-only LATE ADS validation (defense in depth after normalization)
446 #[cfg(windows)]
447 validate_windows_ads_layout(&normalized_path)?;
448
449 // Stage 3: fast-path — try fs::canonicalize on the lexically-normalized path as well
450 if normalized_path != absolute_path {
451 match fs_canonicalize(&normalized_path) {
452 Ok(p) => return Ok(p),
453 Err(e) => match e.kind() {
454 io::ErrorKind::NotFound => { /* fall through to optimized boundary detection */ }
455 io::ErrorKind::InvalidInput | io::ErrorKind::PermissionDenied => return Err(e),
456 _ => { /* fall through to optimized boundary detection */ }
457 },
458 }
459 }
460 // At this point: path doesn't fully exist or canonicalize returned a recoverable error — continue.
461
462 // Stage 3.1: sanity check — validate no embedded NUL bytes (platform-specific)
463 reject_nul_bytes(path)?;
464
465 // Stage 4: collect path components efficiently (root/prefix vs normal names)
466 let mut components = Vec::new();
467 let mut root_prefix = PathBuf::new();
468
469 for component in absolute_path.components() {
470 match component {
471 std::path::Component::RootDir | std::path::Component::Prefix(_) => {
472 root_prefix.push(component.as_os_str());
473 }
474 std::path::Component::Normal(name) => {
475 components.push(name.to_os_string());
476 }
477 // Don't allocate new OsStrings for . and .. - we'll handle them specially
478 std::path::Component::CurDir => components.push(std::ffi::OsString::from(".")),
479 std::path::Component::ParentDir => components.push(std::ffi::OsString::from("..")),
480 }
481 }
482
483 // Stage 5: discover the deepest existing prefix and resolve symlinks inline as encountered
484 let (existing_prefix, existing_count, symlink_seen) =
485 compute_existing_prefix(&root_prefix, &components)?;
486
487 // Stage 6: Build the base result. Only canonicalize the deepest existing ancestor
488 // when needed (e.g., symlink encountered).
489 let mut base = existing_prefix;
490 if existing_count > 0 && symlink_seen {
491 // Identify deepest existing anchor (defensive in case base points at a symlink whose target doesn't exist)
492 let mut anchor = base.as_path();
493 while !anchor.exists() {
494 if let Some(p) = anchor.parent() {
495 anchor = p;
496 } else {
497 break;
498 }
499 }
500 if anchor.exists() {
501 if let Ok(canon_anchor) = fs_canonicalize(anchor) {
502 // Rebuild base as: canonicalized anchor + relative suffix
503 let suffix = base.strip_prefix(anchor).ok();
504 let mut rebuilt = canon_anchor;
505 if let Some(suf) = suffix {
506 rebuilt.push(suf);
507 }
508 base = rebuilt;
509 }
510 }
511 }
512
513 // Windows-only: Expand short-name component if no symlink encountered but base has 8.3 component
514 #[cfg(windows)]
515 {
516 if !symlink_seen && existing_count > 0 && has_windows_short_component(&base) {
517 if let Ok(canon_base) = fs_canonicalize(&base) {
518 base = canon_base;
519 }
520 }
521 }
522
523 let mut result = base;
524
525 // Stage 7: append the non-existing suffix components (purely lexical)
526 let mut suffix_has_dot_or_dotdot = false;
527 for component in components.iter().skip(existing_count) {
528 // Use OsStr comparison instead of creating new OsStr instances
529 if !suffix_has_dot_or_dotdot {
530 let comp_str = component.as_os_str();
531 if comp_str == "." || comp_str == ".." {
532 suffix_has_dot_or_dotdot = true;
533 }
534 }
535 result.push(component);
536 }
537
538 // After we have a fully-resolved base, normalize lexically.
539 // Note: When dunce feature is enabled AND path is verbatim, skip normalization
540 // so dunce can see the raw structure and make correct safety decisions
541 #[cfg(windows)]
542 {
543 #[cfg(feature = "dunce")]
544 {
545 use std::path::{Component, Prefix};
546 let should_normalize = !matches!(
547 result.components().next(),
548 Some(Component::Prefix(p)) if matches!(
549 p.kind(),
550 Prefix::Verbatim(_) | Prefix::VerbatimDisk(_) | Prefix::VerbatimUNC(_, _)
551 )
552 );
553 if should_normalize {
554 result = simple_normalize_path(&result);
555 }
556 }
557 #[cfg(not(feature = "dunce"))]
558 {
559 result = simple_normalize_path(&result);
560 }
561 }
562 #[cfg(not(windows))]
563 {
564 if suffix_has_dot_or_dotdot {
565 result = simple_normalize_path(&result);
566 }
567 }
568
569 // Stage 8 (Windows): ensure extended-length prefix for absolute paths
570 // We always add \\?\ for robustness, then let dunce decide whether to strip it (if enabled)
571 #[cfg(windows)]
572 {
573 use std::path::{Component, Prefix};
574 if let Some(Component::Prefix(pr)) = result.components().next() {
575 match pr.kind() {
576 Prefix::Verbatim(_) | Prefix::VerbatimDisk(_) | Prefix::VerbatimUNC(_, _) => { /* already extended */
577 }
578 Prefix::Disk(_) | Prefix::UNC(_, _) => {
579 result = ensure_windows_extended_prefix(&result);
580 }
581 Prefix::DeviceNS(_) => { /* leave as-is */ }
582 }
583 }
584 }
585
586 // Stage 9 (Optional): dunce feature - simplify paths to legacy format when safe
587 // dunce::simplified() intelligently strips \\?\ only when safe (no reserved names,
588 // path length ok, no .., etc.). It performs no I/O and handles non-existing paths correctly.
589 #[cfg(all(feature = "dunce", windows))]
590 {
591 result = dunce::simplified(&result).to_path_buf();
592 }
593
594 Ok(result)
595}
596
597/// Canonicalize a user-provided path relative to an anchor directory, with virtual filesystem semantics.
598///
599/// This function resolves paths **as if rooted under a given anchor**, performing canonical path
600/// resolution relative to the anchor instead of the current working directory. All paths, including
601/// absolute symlink targets, are clamped to the anchor, implementing true virtual filesystem behavior.
602///
603/// ## Behavior Overview
604/// - Treats `input` as if rooted under `anchor` (strips root/prefix markers from `input`)
605/// - Expands symlinks as encountered (component-by-component), applying `..` after expansion
606/// - **Clamps ALL paths to the `anchor` boundary**, including:
607/// - Lexical `..` traversal in user input
608/// - **All absolute symlink targets** (both within and outside anchor - see below)
609/// - Chained symlinks with mixed absolute and relative targets
610/// - Bounded symlink following with cycle-defense, consistent with `MAX_SYMLINK_DEPTH`
611/// - Mirrors input validations from `soft_canonicalize` (null-byte checks, Windows ADS layout)
612///
613/// ## Absolute Symlink Clamping (Critical Behavior)
614///
615/// When a symlink points to an absolute path, it is **always clamped to the anchor**,
616/// implementing true virtual filesystem semantics. This happens in two cases:
617///
618/// **Case 1: Symlink within anchor** (host-style path)
619/// - Example: Symlink `/tmp/anchor/link` → `/tmp/anchor/docs/file`
620/// - The target already expresses the full host path including the anchor
621/// - Process: Strip anchor prefix, then rejoin to anchor
622/// - Result: `/tmp/anchor/docs/file` (stays within anchor)
623///
624/// **Case 2: Symlink outside anchor** (virtual-style path)
625/// - Example: Symlink `/tmp/anchor/link` → `/etc/passwd`
626/// - The target is an absolute path outside the anchor
627/// - Process: Strip root prefix (`/`), then join to anchor
628/// - Result: `/tmp/anchor/etc/passwd` (clamped to anchor)
629///
630/// In both cases, the anchor acts as a **virtual root** (`/`), similar to chroot behavior.
631/// This ensures symlinks cannot escape the anchor boundary, regardless of where they point.
632///
633/// ## Features
634/// - **Anchored resolution**: Interprets paths relative to a specific anchor directory
635/// - **Virtual filesystem semantics**: Clamps all absolute paths (including symlink targets) to anchor
636/// - **Symlink canonicalization**: Follows symlink chains with clamping at each step
637/// - **Input validation**: Rejects null bytes, malformed UNC paths, and empty paths
638/// - **Cycle detection**: Prevents infinite symlink loops with configurable depth limits
639///
640/// ## Use Cases
641/// - **Virtual filesystem implementations**: Provides correct symlink resolution behavior
642/// when operating within virtual/constrained directory spaces
643/// - **Containerized environments**: Ensures symlinks resolve properly relative to a virtual root
644/// - **Chroot-like scenarios**: Maintains correct path semantics within bounded directory trees
645/// - **Build systems**: Resolving paths relative to project roots with proper symlink handling
646/// - **Applications needing anchor-relative interpretation**: Consistent path resolution
647/// relative to a base directory while preserving symlink semantics
648/// - **Path sandboxing**: Building higher-level path processing APIs with controlled resolution scope
649///
650/// ## Output Format
651///
652/// The output format follows the same rules as [`soft_canonicalize`]:
653/// - **Without `dunce` feature (default)**: Windows returns extended-length UNC paths (`\\?\C:\foo`)
654/// - **With `dunce` feature enabled**: Windows returns simplified paths (`C:\foo`) when safe
655/// - Unix systems always return standard absolute paths
656///
657/// ## Notes
658/// - The `anchor` is canonicalized (soft) first; the result is absolute
659/// - For fully-existing final paths, this typically matches `std::fs::canonicalize` of the
660/// resolved path; however, semantics differ because `input` is interpreted relative to `anchor`
661/// - Enable with `--features anchored` (optional feature to keep core library lightweight)
662///
663/// ## Example
664/// ```
665/// use soft_canonicalize::{anchored_canonicalize, soft_canonicalize};
666/// use std::fs;
667///
668/// # fn demo() -> Result<(), std::io::Error> {
669/// let anchor = std::env::temp_dir().join("sc_anchor_demo").join("root");
670/// fs::create_dir_all(&anchor)?;
671///
672/// let base = soft_canonicalize(&anchor)?;
673///
674/// // Absolute input paths are clamped to anchor
675/// let out = anchored_canonicalize(&base, "/etc/passwd")?;
676/// assert_eq!(out, base.join("etc").join("passwd"));
677///
678/// // Lexical .. traversal is also clamped
679/// let out2 = anchored_canonicalize(&base, "../../../etc/passwd")?;
680/// assert_eq!(out2, base.join("etc").join("passwd"));
681/// # Ok(())
682/// # }
683/// # demo().unwrap();
684/// ```
685///
686/// ## Symlink Clamping Example
687/// ```
688/// # #[cfg(unix)]
689/// # fn demo() -> Result<(), std::io::Error> {
690/// use soft_canonicalize::{anchored_canonicalize, soft_canonicalize};
691/// use std::os::unix::fs::symlink;
692/// use std::fs;
693///
694/// let anchor = std::env::temp_dir().join("sc_symlink_demo2").join("root");
695/// fs::create_dir_all(&anchor)?;
696/// let base = soft_canonicalize(&anchor)?;
697///
698/// // Create a symlink pointing to absolute path outside anchor
699/// let external_path = std::env::temp_dir().join("external_data2");
700/// fs::create_dir_all(&external_path)?;
701/// let link_path = base.join("mylink");
702/// let _ = fs::remove_file(&link_path); // Clean up if exists
703/// symlink(&external_path, &link_path)?;
704///
705/// // The absolute symlink target is CLAMPED to the anchor
706/// let result = anchored_canonicalize(&base, "mylink")?;
707/// // Result stays within anchor (virtual filesystem semantics)
708/// assert!(result.starts_with(&base));
709/// # Ok(())
710/// # }
711/// # #[cfg(unix)]
712/// # demo().unwrap();
713/// ```
714#[must_use = "this function returns a new PathBuf without modifying the input"]
715#[doc(alias = "chroot")]
716#[doc(alias = "jail")]
717#[doc(alias = "sandbox")]
718#[doc(alias = "virtual_root")]
719#[cfg(feature = "anchored")]
720#[cfg_attr(docsrs, doc(cfg(feature = "anchored")))]
721pub fn anchored_canonicalize(
722 anchor: impl AsRef<Path>,
723 input: impl AsRef<Path>,
724) -> io::Result<PathBuf> {
725 let anchor = anchor.as_ref();
726 let input = input.as_ref();
727
728 // Basic input validation (empty paths)
729 if anchor.as_os_str().is_empty() {
730 return Err(error_with_path(
731 io::ErrorKind::NotFound,
732 anchor,
733 "anchor path is empty",
734 ));
735 }
736
737 // Reject NULs (platform-specific)
738 reject_nul_bytes(anchor)?;
739 reject_nul_bytes(input)?;
740
741 // Windows-only: reject incomplete UNC anchors early
742 #[cfg(windows)]
743 {
744 if is_incomplete_unc(anchor) {
745 return Err(error_with_path(
746 io::ErrorKind::InvalidInput,
747 anchor,
748 "invalid UNC path: missing share",
749 ));
750 }
751 }
752
753 // On Windows, treat drive-relative anchors (e.g., "C:dir") as absolute anchors ("C:\\dir").
754 // Anchors act as virtual roots and should not depend on the process's per-drive cwd.
755 #[cfg(windows)]
756 let anchor = {
757 use std::path::{Component, Prefix};
758 let mut comps = anchor.components();
759 match comps.next() {
760 Some(Component::Prefix(pr)) => match pr.kind() {
761 Prefix::Disk(drive) => {
762 let mut rest = comps.clone();
763 let is_absolute = matches!(rest.next(), Some(Component::RootDir));
764 if is_absolute {
765 anchor.to_path_buf()
766 } else {
767 // Synthesize absolute from drive-relative: "C:\\" + remaining components
768 let mut out = PathBuf::from(format!("{}:\\", drive as char));
769 for c in comps {
770 out.push(c.as_os_str());
771 }
772 out
773 }
774 }
775 _ => anchor.to_path_buf(),
776 },
777 _ => anchor.to_path_buf(),
778 }
779 };
780
781 // Canonicalize anchor (soft) to get absolute, platform-correct base even if parts don't exist.
782 let mut base = soft_canonicalize(anchor)?;
783
784 // Early ADS validation on the combined textual intent (defense-in-depth)
785 #[cfg(windows)]
786 validate_windows_ads_layout(&base.join(input))?;
787
788 // Clamp floor: all paths (including symlink targets) stay within the anchor.
789 let anchor_floor = base.clone();
790
791 // Process components directly without a queue - simpler and more efficient
792 for comp in input.components() {
793 use std::path::Component;
794 match comp {
795 Component::Normal(seg) => {
796 base.push(seg);
797
798 // Resolve symlink chain at `base` using anchor-aware resolver
799 if let Ok(meta) = std::fs::symlink_metadata(&base) {
800 if meta.file_type().is_symlink() {
801 // Use anchored symlink resolver that implements virtual filesystem semantics
802 let resolved =
803 crate::symlink::resolve_anchored_symlink_chain(&base, &anchor_floor)?;
804
805 // Final safety check: ensure resolved path is within anchor
806 if !resolved.starts_with(&anchor_floor) {
807 // Virtual filesystem semantics: reinterpret escaped path as relative to anchor
808 // Find common ancestor and preserve relative path structure
809 // Example: resolved = /tmp/xyz/opt/file, anchor = /tmp/xyz/home/jail
810 // Common ancestor: /tmp/xyz
811 // Resolved relative to common: opt/file
812 // Result: /tmp/xyz/home/jail/opt/file
813
814 // Find longest common prefix by comparing components
815 let mut common_depth = 0;
816 let anchor_comps: Vec<_> = anchor_floor.components().collect();
817 let resolved_comps: Vec<_> = resolved.components().collect();
818 for (a, r) in anchor_comps.iter().zip(resolved_comps.iter()) {
819 if a == r {
820 common_depth += 1;
821 } else {
822 break;
823 }
824 }
825
826 // Build clamped path: anchor + (resolved components after common prefix)
827 base = anchor_floor.clone();
828 for comp in resolved_comps.iter().skip(common_depth) {
829 base.push(comp);
830 }
831 } else {
832 base = resolved;
833 }
834 }
835 }
836 }
837 Component::ParentDir => {
838 // Clamp ".." to anchor boundary
839 if base != anchor_floor && base.starts_with(&anchor_floor) {
840 let _ = base.pop();
841 }
842 }
843 Component::CurDir => {
844 // Skip "." - no-op
845 }
846 Component::RootDir | Component::Prefix(_) => {
847 // Strip root/prefix per spec; do not process
848 }
849 }
850 }
851
852 // LATE Windows ADS validation
853 #[cfg(windows)]
854 validate_windows_ads_layout(&base)?;
855
856 // Ensure Windows extended-length normalization for absolute results
857 // We always add \\?\ for robustness, then let dunce decide whether to strip it (if enabled)
858 #[cfg(windows)]
859 {
860 use std::path::{Component, Prefix};
861 if let Some(Component::Prefix(pr)) = base.components().next() {
862 match pr.kind() {
863 Prefix::Verbatim(_) | Prefix::VerbatimDisk(_) | Prefix::VerbatimUNC(_, _) => {}
864 Prefix::Disk(_) | Prefix::UNC(_, _) => {
865 base = ensure_windows_extended_prefix(&base);
866 }
867 Prefix::DeviceNS(_) => {}
868 }
869 }
870 }
871
872 // Optional: dunce feature - simplify UNC paths to legacy format when safe
873 // dunce::simplified() intelligently strips \\?\ only when safe (no reserved names,
874 // path length ok, no .., etc.). It performs no I/O and handles non-existing paths correctly.
875 #[cfg(all(feature = "dunce", windows))]
876 {
877 base = dunce::simplified(&base).to_path_buf();
878 }
879
880 Ok(base)
881}
882
883#[cfg(test)]
884mod tests {
885 // Test utilities for feature-conditional assertions
886 mod test_utils;
887
888 #[cfg(feature = "anchored")]
889 mod anchored_canonicalize;
890 #[cfg(feature = "anchored")]
891 mod anchored_relative_symlink_clamping;
892 #[cfg(feature = "anchored")]
893 mod anchored_security;
894 #[cfg(feature = "anchored")]
895 mod anchored_symlink_clamping;
896 mod api_compatibility;
897 mod basic_functionality;
898 mod cve_2024_2025_security;
899 mod cve_tests;
900 mod edge_case_robustness;
901 mod edge_cases;
902 mod exotic_edge_cases;
903 mod format_verification;
904 mod optimization;
905 mod path_traversal;
906 mod platform_specific;
907 mod python_inspired_tests;
908 mod python_lessons;
909 mod security_audit;
910 mod short_filename_detection;
911 mod std_behavior;
912 mod symlink_depth;
913 mod symlink_dotdot_resolution_order;
914 mod symlink_dotdot_symlink_first;
915 #[cfg(windows)]
916 mod windows_path_stripping;
917
918 // dunce feature test suite (Windows-only)
919 #[cfg(all(feature = "dunce", windows))]
920 mod dunce_feature;
921}