soft_canonicalize/
lib.rs

1//! # soft-canonicalize
2//!
3//! **Path canonicalization that works with non-existing paths.**
4//!
5//! Rust implementation inspired by Python 3.6+ `pathlib.Path.resolve(strict=False)`, providing
6//! the same functionality as `std::fs::canonicalize` (Rust's equivalent to Unix `realpath()`)
7//! but extended to handle non-existing paths, with optional features for simplified Windows
8//! output (`dunce`) and virtual filesystem semantics (`anchored`).
9//!
10//! ## Why Use This?
11//!
12//! - **🚀 Works with non-existing paths** - Plan file locations before creating them
13//! - **⚡ Fast** - Optimized performance with minimal allocations and syscalls
14//! - **✅ Compatible** - 100% behavioral match with `std::fs::canonicalize` for existing paths, with optional UNC simplification via `dunce` feature (Windows)
15//! - **🎯 Virtual filesystem support** - Optional `anchored` feature for bounded canonicalization within directory boundaries
16//! - **🔒 Robust** - 495 comprehensive tests covering edge cases and security scenarios
17//! - **🛡️ Safe traversal** - Proper `..` and symlink resolution with cycle detection
18//! - **🌍 Cross-platform** - Windows, macOS, Linux with comprehensive UNC/symlink handling
19//! - **🔧 Zero dependencies** - Optional features may add minimal dependencies
20//!
21//! ## Lexical vs. Filesystem-Based Resolution
22//!
23//! Path resolution libraries fall into two categories:
24//!
25//! **Lexical Resolution** (no I/O):
26//! - **Performance**: Fast - no filesystem access
27//! - **Accuracy**: Incorrect if symlinks are present (doesn't resolve them)
28//! - **Use when**: You're 100% certain no symlinks exist and need maximum performance
29//! - **Examples**: `std::path::absolute`, `normpath::normalize`
30//!
31//! **Filesystem-Based Resolution** (performs I/O):
32//! - **Performance**: Slower - requires filesystem syscalls to resolve symlinks
33//! - **Accuracy**: Correct - follows symlinks to their targets
34//! - **Use when**: Safety is priority over performance, or symlinks may be present
35//! - **Examples**: `std::fs::canonicalize`, `soft_canonicalize`, `dunce::canonicalize`
36//!
37//! **Rule of thumb**: If you cannot guarantee symlinks won't be introduced, or if correctness is critical, use filesystem-based resolution.
38//!
39//! ## Use Cases
40//!
41//! ### Path Comparison
42//!
43//! - **Equality**: Determine if two different path strings point to the same location
44//! - **Containment**: Check if one path is inside another directory
45//!
46//! ### Common Applications
47//!
48//! - **Build Systems**: Resolve output paths during build planning before directories exist
49//! - **Configuration Validation**: Ensure user-provided paths stay within allowed boundaries
50//! - **Deduplication**: Detect when different path strings refer to the same planned location
51//! - **Cross-Platform Normalization**: Handle Windows UNC paths and symlinks consistently
52//!
53//! ## Quick Start
54//!
55//! ```toml
56//! [dependencies]
57//! soft-canonicalize = "0.5"
58//! ```
59//!
60//! ### Basic Example
61//!
62//! ```rust
63//! # #[cfg(windows)]
64//! # {
65//! use soft_canonicalize::soft_canonicalize;
66//!
67//! let non_existing_path = r"C:\Users\user\documents\..\non\existing\config.json";
68//!
69//! // Using Rust's own std canonicalize function:
70//! let result = std::fs::canonicalize(non_existing_path);
71//! assert!(result.is_err());
72//!
73//! // Using our crate's function:
74//! let result = soft_canonicalize(non_existing_path);
75//! assert!(result.is_ok());
76//!
77//! // Shows the UNC path conversion and path normalization
78//! # #[cfg(not(feature = "dunce"))]
79//! assert_eq!(
80//!     result.unwrap().to_string_lossy(),
81//!     r"\\?\C:\Users\user\non\existing\config.json"
82//! );
83//!
84//! // With `dunce` feature enabled, paths are simplified when safe
85//! # #[cfg(feature = "dunce")]
86//! assert_eq!(
87//!     result.unwrap().to_string_lossy(),
88//!     r"C:\Users\user\non\existing\config.json"
89//! );
90//! # }
91//! # Ok::<(), std::io::Error>(())
92//! ```
93//!
94//! ## Optional Features
95//!
96//! ### Anchored Canonicalization (`anchored` feature)
97//!
98//! For **correct symlink resolution within virtual/constrained directory spaces**, use
99//! `anchored_canonicalize`. This function implements true virtual filesystem semantics by
100//! clamping ALL paths (including absolute symlink targets) to the anchor directory:
101//!
102//! ```toml
103//! [dependencies]
104//! soft-canonicalize = { version = "0.5", features = ["anchored"] }
105//! ```
106//!
107//! ```rust
108//! # #[cfg(feature = "anchored")]
109//! use soft_canonicalize::anchored_canonicalize;
110//! # #[cfg(not(feature = "anchored"))]
111//! # use soft_canonicalize::soft_canonicalize;
112//! use std::fs;
113//!
114//! # fn example() -> Result<(), std::io::Error> {
115//! // Set up an anchor/root directory (no need to pre-canonicalize)
116//! let anchor = std::env::temp_dir().join("workspace_root");
117//! fs::create_dir_all(&anchor)?;
118//!
119//! // Canonicalize paths relative to the anchor (anchor is soft-canonicalized internally)
120//! # #[cfg(feature = "anchored")]
121//! let resolved_path = anchored_canonicalize(&anchor, "../../../etc/passwd")?;
122//! # #[cfg(not(feature = "anchored"))]
123//! # { let _ = (&anchor, "../../../etc/passwd"); }
124//! // Result: /tmp/workspace_root/etc/passwd (lexical .. clamped to anchor)
125//!
126//! // Absolute symlinks are also clamped to the anchor
127//! // If there's a symlink: workspace_root/config -> /etc/config
128//! // It resolves to: workspace_root/etc/config (clamped to anchor)
129//! # #[cfg(feature = "anchored")]
130//! let symlink_path = anchored_canonicalize(&anchor, "config")?;
131//! # #[cfg(not(feature = "anchored"))]
132//! # { let _ = "config"; }
133//! // Safe: always stays within workspace_root, even if symlink points to /etc/config
134//! # Ok(())
135//! # }
136//! ```
137//!
138//! **Key features:**
139//! - Virtual filesystem semantics: All absolute paths (including symlink targets) are clamped to anchor
140//! - Anchor-relative canonicalization: Resolves paths relative to a specific anchor directory
141//! - Complete symlink clamping: Follows symlink chains with clamping at each step
142//! - Component-by-component: Processes path components in proper order
143//! - Absolute results: Always returns absolute canonical paths within the anchor boundary
144//!
145//! **For a complete multi-tenant security example**, run:
146//! ```bash
147//! cargo run --example virtual_filesystem_demo --features anchored
148//! ```
149//!
150//! ### Simplified Path Output (`dunce` feature, Windows-only)
151//!
152//! By default, `soft_canonicalize` returns Windows paths in extended-length UNC format
153//! (`\\?\C:\foo`) for maximum robustness and compatibility with long paths, reserved names,
154//! and other Windows filesystem edge cases.
155//!
156//! If you need simplified paths (`C:\foo`) for compatibility with legacy applications or
157//! user-facing output, enable the **`dunce` feature**:
158//!
159//! ```toml
160//! [dependencies]
161//! soft-canonicalize = { version = "0.5", features = ["dunce"] }
162//! ```
163//!
164//! **Example:**
165//!
166//! ```rust
167//! use soft_canonicalize::soft_canonicalize;
168//! # fn example() -> Result<(), std::io::Error> {
169//! # #[cfg(windows)]
170//! # {
171//! let path = soft_canonicalize(r"C:\Users\user\documents\..\config.json")?;
172//!
173//! // Without dunce feature (default):
174//! // Returns: \\?\C:\Users\user\config.json (extended-length UNC)
175//!
176//! // With dunce feature enabled:
177//! // Returns: C:\Users\user\config.json (simplified when safe)
178//! # }
179//! # Ok(())
180//! # }
181//! ```
182//!
183//! **When to use:**
184//! - ✅ Legacy applications that don't support UNC paths
185//! - ✅ User-facing output requiring familiar path format
186//! - ✅ Tools expecting traditional Windows path format
187//!
188//! **How it works:**
189//!
190//! The [dunce](https://crates.io/crates/dunce) crate intelligently simplifies Windows UNC paths
191//! (`\\?\C:\foo` → `C:\foo`) **only when safe**:
192//! - Automatically keeps UNC for paths >260 chars
193//! - Automatically keeps UNC for reserved names (CON, PRN, NUL, COM1-9, LPT1-9)
194//! - Automatically keeps UNC for paths with trailing spaces/dots
195//! - Automatically keeps UNC for paths containing `..` (literal interpretation)
196//!
197//! ## When Paths Must Exist: `proc-canonicalize`
198//!
199//! Since v0.5.0, `soft_canonicalize` uses [`proc-canonicalize`](https://crates.io/crates/proc-canonicalize)
200//! by default for existing-path canonicalization instead of `std::fs::canonicalize`. This fixes a
201//! critical issue with Linux namespace boundaries.
202//!
203//! **The Problem**: On Linux, `std::fs::canonicalize` resolves "magic symlinks" like `/proc/PID/root`
204//! to their targets, losing the namespace boundary:
205//!
206//! ```rust
207//! # #[cfg(all(target_os = "linux", feature = "proc-canonicalize"))]
208//! # fn main() -> std::io::Result<()> {
209//! // /proc/self/root is a "magic symlink" pointing to the current process's root filesystem
210//! // std::fs::canonicalize incorrectly resolves it to "/"
211//! let std_result = std::fs::canonicalize("/proc/self/root")?;
212//! assert_eq!(std_result.to_string_lossy(), "/"); // Wrong! Namespace boundary lost
213//!
214//! // proc_canonicalize preserves the namespace boundary
215//! let proc_result = proc_canonicalize::canonicalize("/proc/self/root")?;
216//! assert_eq!(proc_result.to_string_lossy(), "/proc/self/root"); // Correct!
217//! # Ok(())
218//! # }
219//! # #[cfg(not(all(target_os = "linux", feature = "proc-canonicalize")))]
220//! # fn main() {}
221//! ```
222//!
223//! **Recommendation**: If you need to canonicalize paths that **must exist** (and would previously
224//! use `std::fs::canonicalize`), use `proc_canonicalize::canonicalize` for correct Linux namespace
225//! handling:
226//!
227//! ```toml
228//! [dependencies]
229//! proc-canonicalize = "0.0"
230//! ```
231//!
232//! ## Security & CVE Coverage
233//!
234//! Security does not depend on enabling features. The core API is secure-by-default; the optional
235//! `anchored` feature is a convenience for virtual roots. We test all modes (no features;
236//! `--features anchored`; `--features anchored,dunce`).
237//!
238//! **Built-in protections include:**
239//! - **NTFS Alternate Data Stream (ADS) validation** - Blocks malicious stream placements and traversal attempts
240//! - **Symlink cycle detection** - Bounded depth tracking prevents infinite loops
241//! - **Path traversal clamping** - Never ascends past root/share/device boundaries
242//! - **Null byte rejection** - Early validation prevents injection attacks
243//! - **UNC/device semantics** - Preserves Windows extended-length and device namespace integrity
244//! - **TOCTOU race resistance** - Tested against time-of-check-time-of-use attacks
245//!
246//! See [`docs/SECURITY.md`](https://github.com/DK26/soft-canonicalize-rs/blob/dev/docs/SECURITY.md)
247//! for detailed analysis, attack scenarios, and test references.
248//!
249//! ## Cross-Platform Notes
250//!
251//! - Windows: returns extended-length verbatim paths for absolute results (`\\?\C:\…`, `\\?\UNC\…`)
252//!   - With `dunce` feature: returns simplified paths (`C:\…`) when safe
253//! - Unix-like systems: standard absolute and relative path semantics
254//! - UNC floors and device namespaces are preserved and respected
255//!
256//! ## Testing
257//!
258//! 495 tests including:
259//! - std::fs::canonicalize compatibility tests (existing paths)
260//! - Path traversal and robustness tests
261//! - Python pathlib-inspired behavior checks
262//! - Platform-specific cases (Windows/macOS/Linux)
263//! - Symlink semantics and cycle detection
264//! - Windows-specific UNC, 8.3, and ADS validation
265//! - Anchored canonicalization tests (with `anchored` feature)
266//!
267//! ## Known Limitation (Windows 8.3)
268//!
269//! On Windows, for non-existing paths we cannot determine equivalence between a short (8.3)
270//! name and its long form. Existing paths are canonicalized to the same result.
271//!
272//! ```rust
273//! use soft_canonicalize::soft_canonicalize;
274//! # fn example() -> Result<(), std::io::Error> {
275//! # #[cfg(windows)]
276//! # {
277//! let short_form = soft_canonicalize("C:/PROGRA~1/MyApp/config.json")?;
278//! let long_form  = soft_canonicalize("C:/Program Files/MyApp/config.json")?;
279//! assert_ne!(short_form, long_form); // for non-existing suffixes
280//! # }
281//! # Ok(())
282//! # }
283//! ```
284//!
285//! ## How It Works
286//!
287//! For those interested in the implementation details, here's how `soft_canonicalize` processes paths:
288//!
289//! 1. Input validation (empty path, platform pre-checks)
290//! 2. Convert to absolute path (preserving drive/root semantics)
291//! 3. Fast-path: try `fs::canonicalize` on the original absolute path
292//! 4. Lexically normalize `.` and `..` (fast-path optimization for whole-path existence check)
293//! 5. Fast-path: try `fs::canonicalize` on the normalized path when different
294//! 6. Validate null bytes (platform-specific)
295//! 7. Discover deepest existing prefix with **symlink-first** semantics: resolve symlinks incrementally, then process `.` and `..` relative to resolved targets
296//! 8. Optionally canonicalize the anchor (if symlinks seen) and rebuild
297//! 9. Append non-existing suffix lexically, then normalize if needed
298//! 10. Windows: ensure extended-length prefix for absolute paths
299//! 11. Optional: simplify Windows paths when `dunce` feature enabled
300
301mod error;
302mod normalize;
303mod prefix;
304mod symlink;
305#[cfg(windows)]
306mod windows;
307
308pub use error::{IoErrorPathExt, SoftCanonicalizeError};
309pub use symlink::MAX_SYMLINK_DEPTH;
310
311use crate::error::error_with_path;
312use crate::normalize::simple_normalize_path;
313use crate::prefix::compute_existing_prefix;
314#[cfg(windows)]
315use crate::windows::{
316    ensure_windows_extended_prefix, has_windows_short_component, is_incomplete_unc,
317    validate_windows_ads_layout,
318};
319
320use std::io;
321use std::path::{Path, PathBuf};
322
323// Canonicalization backend selection (priority order):
324// 1. proc-canonicalize feature (default): fixes Linux /proc/PID/root magic symlinks,
325//    and delegates to dunce when both features are enabled
326// 2. dunce feature on Windows (without proc-canonicalize): uses dunce::canonicalize
327// 3. fallback: uses std::fs::canonicalize
328#[cfg(feature = "proc-canonicalize")]
329use proc_canonicalize::canonicalize as fs_canonicalize;
330
331#[cfg(all(not(feature = "proc-canonicalize"), feature = "dunce", windows))]
332use dunce::canonicalize as fs_canonicalize;
333
334#[cfg(all(
335    not(feature = "proc-canonicalize"),
336    not(all(feature = "dunce", windows))
337))]
338use std::fs::canonicalize as fs_canonicalize;
339
340#[inline]
341fn path_contains_nul(p: &Path) -> bool {
342    #[cfg(unix)]
343    {
344        use std::os::unix::ffi::OsStrExt;
345        p.as_os_str().as_bytes().contains(&0)
346    }
347    #[cfg(windows)]
348    {
349        use std::os::windows::ffi::OsStrExt;
350        p.as_os_str().encode_wide().any(|u| u == 0)
351    }
352    #[cfg(not(any(unix, windows)))]
353    {
354        // Fallback for other platforms
355        return false;
356    }
357}
358
359#[inline]
360fn reject_nul_bytes(p: &Path) -> io::Result<()> {
361    if path_contains_nul(p) {
362        return Err(error_with_path(
363            io::ErrorKind::InvalidInput,
364            p,
365            "path contains null byte",
366        ));
367    }
368    Ok(())
369}
370
371/// Performs "soft" canonicalization on a path.
372///
373/// Unlike `std::fs::canonicalize()`, this function works with non-existent paths by:
374/// 1. Finding the deepest existing ancestor directory
375/// 2. Canonicalizing that existing part (resolving symlinks, normalizing case, etc.)
376/// 3. Appending the non-existing path components to the canonicalized base
377///
378/// This provides canonicalization benefits (symlink resolution, path normalization)
379/// without requiring the entire path to exist.
380///
381/// # Output Format
382///
383/// **Without `dunce` feature (default):**
384/// - Windows: Returns extended-length UNC paths (`\\?\C:\foo`) for maximum robustness
385/// - Unix: Returns standard absolute paths (`/foo`)
386///
387/// **With `dunce` feature enabled:**
388/// - Windows: Returns simplified paths (`C:\foo`) when safe to do so
389/// - Unix: Returns standard absolute paths (`/foo`) - no change
390///
391/// See the [module documentation](crate#optional-features) for details on the `dunce` feature.
392#[must_use = "this function returns a new PathBuf without modifying the input"]
393#[doc(alias = "realpath")]
394#[doc(alias = "canonicalize")]
395#[doc(alias = "resolve")]
396#[doc(alias = "absolute")]
397pub fn soft_canonicalize(path: impl AsRef<Path>) -> io::Result<PathBuf> {
398    let path = path.as_ref();
399
400    // Stage 0: guard-rail — handle empty path early (aligns with std::fs::canonicalize)
401    if path.as_os_str().is_empty() {
402        return Err(error_with_path(
403            io::ErrorKind::NotFound,
404            path,
405            "The system cannot find the path specified.",
406        ));
407    }
408
409    // Windows-only: explicit guard — reject incomplete UNC roots (\\server without a share)
410    #[cfg(windows)]
411    {
412        if is_incomplete_unc(path) {
413            return Err(error_with_path(
414                io::ErrorKind::InvalidInput,
415                path,
416                "invalid UNC path: missing share",
417            ));
418        }
419    }
420
421    // Stage 1: convert to absolute path (preserves drive/root semantics)
422    let absolute_path = if path.is_absolute() {
423        path.to_path_buf()
424    } else {
425        std::env::current_dir()?.join(path)
426    };
427
428    // Windows-only EARLY ADS validation (before lexical normalization)
429    #[cfg(windows)]
430    validate_windows_ads_layout(&absolute_path)?;
431
432    // Stage 1.5: fast-path — attempt std canonicalize on the ORIGINAL absolute path first.
433    match fs_canonicalize(&absolute_path) {
434        Ok(p) => return Ok(p),
435        Err(e) => match e.kind() {
436            io::ErrorKind::NotFound => { /* continue to boundary detection */ }
437            io::ErrorKind::InvalidInput | io::ErrorKind::PermissionDenied => return Err(e),
438            _ => { /* continue to optimized boundary detection */ }
439        },
440    }
441
442    // Stage 2: pre-normalize lexically (resolve . and .. without touching the filesystem)
443    let normalized_path = simple_normalize_path(&absolute_path);
444
445    // Windows-only LATE ADS validation (defense in depth after normalization)
446    #[cfg(windows)]
447    validate_windows_ads_layout(&normalized_path)?;
448
449    // Stage 3: fast-path — try fs::canonicalize on the lexically-normalized path as well
450    if normalized_path != absolute_path {
451        match fs_canonicalize(&normalized_path) {
452            Ok(p) => return Ok(p),
453            Err(e) => match e.kind() {
454                io::ErrorKind::NotFound => { /* fall through to optimized boundary detection */ }
455                io::ErrorKind::InvalidInput | io::ErrorKind::PermissionDenied => return Err(e),
456                _ => { /* fall through to optimized boundary detection */ }
457            },
458        }
459    }
460    // At this point: path doesn't fully exist or canonicalize returned a recoverable error — continue.
461
462    // Stage 3.1: sanity check — validate no embedded NUL bytes (platform-specific)
463    reject_nul_bytes(path)?;
464
465    // Stage 4: collect path components efficiently (root/prefix vs normal names)
466    let mut components = Vec::new();
467    let mut root_prefix = PathBuf::new();
468
469    for component in absolute_path.components() {
470        match component {
471            std::path::Component::RootDir | std::path::Component::Prefix(_) => {
472                root_prefix.push(component.as_os_str());
473            }
474            std::path::Component::Normal(name) => {
475                components.push(name.to_os_string());
476            }
477            // Don't allocate new OsStrings for . and .. - we'll handle them specially
478            std::path::Component::CurDir => components.push(std::ffi::OsString::from(".")),
479            std::path::Component::ParentDir => components.push(std::ffi::OsString::from("..")),
480        }
481    }
482
483    // Stage 5: discover the deepest existing prefix and resolve symlinks inline as encountered
484    let (existing_prefix, existing_count, symlink_seen) =
485        compute_existing_prefix(&root_prefix, &components)?;
486
487    // Stage 6: Build the base result. Only canonicalize the deepest existing ancestor
488    // when needed (e.g., symlink encountered).
489    let mut base = existing_prefix;
490    if existing_count > 0 && symlink_seen {
491        // Identify deepest existing anchor (defensive in case base points at a symlink whose target doesn't exist)
492        let mut anchor = base.as_path();
493        while !anchor.exists() {
494            if let Some(p) = anchor.parent() {
495                anchor = p;
496            } else {
497                break;
498            }
499        }
500        if anchor.exists() {
501            if let Ok(canon_anchor) = fs_canonicalize(anchor) {
502                // Rebuild base as: canonicalized anchor + relative suffix
503                let suffix = base.strip_prefix(anchor).ok();
504                let mut rebuilt = canon_anchor;
505                if let Some(suf) = suffix {
506                    rebuilt.push(suf);
507                }
508                base = rebuilt;
509            }
510        }
511    }
512
513    // Windows-only: Expand short-name component if no symlink encountered but base has 8.3 component
514    #[cfg(windows)]
515    {
516        if !symlink_seen && existing_count > 0 && has_windows_short_component(&base) {
517            if let Ok(canon_base) = fs_canonicalize(&base) {
518                base = canon_base;
519            }
520        }
521    }
522
523    let mut result = base;
524
525    // Stage 7: append the non-existing suffix components (purely lexical)
526    let mut suffix_has_dot_or_dotdot = false;
527    for component in components.iter().skip(existing_count) {
528        // Use OsStr comparison instead of creating new OsStr instances
529        if !suffix_has_dot_or_dotdot {
530            let comp_str = component.as_os_str();
531            if comp_str == "." || comp_str == ".." {
532                suffix_has_dot_or_dotdot = true;
533            }
534        }
535        result.push(component);
536    }
537
538    // After we have a fully-resolved base, normalize lexically.
539    // Note: When dunce feature is enabled AND path is verbatim, skip normalization
540    // so dunce can see the raw structure and make correct safety decisions
541    #[cfg(windows)]
542    {
543        #[cfg(feature = "dunce")]
544        {
545            use std::path::{Component, Prefix};
546            let should_normalize = !matches!(
547                result.components().next(),
548                Some(Component::Prefix(p)) if matches!(
549                    p.kind(),
550                    Prefix::Verbatim(_) | Prefix::VerbatimDisk(_) | Prefix::VerbatimUNC(_, _)
551                )
552            );
553            if should_normalize {
554                result = simple_normalize_path(&result);
555            }
556        }
557        #[cfg(not(feature = "dunce"))]
558        {
559            result = simple_normalize_path(&result);
560        }
561    }
562    #[cfg(not(windows))]
563    {
564        if suffix_has_dot_or_dotdot {
565            result = simple_normalize_path(&result);
566        }
567    }
568
569    // Stage 8 (Windows): ensure extended-length prefix for absolute paths
570    // We always add \\?\ for robustness, then let dunce decide whether to strip it (if enabled)
571    #[cfg(windows)]
572    {
573        use std::path::{Component, Prefix};
574        if let Some(Component::Prefix(pr)) = result.components().next() {
575            match pr.kind() {
576                Prefix::Verbatim(_) | Prefix::VerbatimDisk(_) | Prefix::VerbatimUNC(_, _) => { /* already extended */
577                }
578                Prefix::Disk(_) | Prefix::UNC(_, _) => {
579                    result = ensure_windows_extended_prefix(&result);
580                }
581                Prefix::DeviceNS(_) => { /* leave as-is */ }
582            }
583        }
584    }
585
586    // Stage 9 (Optional): dunce feature - simplify paths to legacy format when safe
587    // dunce::simplified() intelligently strips \\?\ only when safe (no reserved names,
588    // path length ok, no .., etc.). It performs no I/O and handles non-existing paths correctly.
589    #[cfg(all(feature = "dunce", windows))]
590    {
591        result = dunce::simplified(&result).to_path_buf();
592    }
593
594    Ok(result)
595}
596
597/// Canonicalize a user-provided path relative to an anchor directory, with virtual filesystem semantics.
598///
599/// This function resolves paths **as if rooted under a given anchor**, performing canonical path
600/// resolution relative to the anchor instead of the current working directory. All paths, including
601/// absolute symlink targets, are clamped to the anchor, implementing true virtual filesystem behavior.
602///
603/// ## Behavior Overview
604/// - Treats `input` as if rooted under `anchor` (strips root/prefix markers from `input`)
605/// - Expands symlinks as encountered (component-by-component), applying `..` after expansion
606/// - **Clamps ALL paths to the `anchor` boundary**, including:
607///   - Lexical `..` traversal in user input
608///   - **All absolute symlink targets** (both within and outside anchor - see below)
609///   - Chained symlinks with mixed absolute and relative targets
610/// - Bounded symlink following with cycle-defense, consistent with `MAX_SYMLINK_DEPTH`
611/// - Mirrors input validations from `soft_canonicalize` (null-byte checks, Windows ADS layout)
612///
613/// ## Absolute Symlink Clamping (Critical Behavior)
614///
615/// When a symlink points to an absolute path, it is **always clamped to the anchor**,
616/// implementing true virtual filesystem semantics. This happens in two cases:
617///
618/// **Case 1: Symlink within anchor** (host-style path)
619/// - Example: Symlink `/tmp/anchor/link` → `/tmp/anchor/docs/file`
620/// - The target already expresses the full host path including the anchor
621/// - Process: Strip anchor prefix, then rejoin to anchor
622/// - Result: `/tmp/anchor/docs/file` (stays within anchor)
623///
624/// **Case 2: Symlink outside anchor** (virtual-style path)
625/// - Example: Symlink `/tmp/anchor/link` → `/etc/passwd`
626/// - The target is an absolute path outside the anchor
627/// - Process: Strip root prefix (`/`), then join to anchor
628/// - Result: `/tmp/anchor/etc/passwd` (clamped to anchor)
629///
630/// In both cases, the anchor acts as a **virtual root** (`/`), similar to chroot behavior.
631/// This ensures symlinks cannot escape the anchor boundary, regardless of where they point.
632///
633/// ## Features
634/// - **Anchored resolution**: Interprets paths relative to a specific anchor directory
635/// - **Virtual filesystem semantics**: Clamps all absolute paths (including symlink targets) to anchor
636/// - **Symlink canonicalization**: Follows symlink chains with clamping at each step
637/// - **Input validation**: Rejects null bytes, malformed UNC paths, and empty paths
638/// - **Cycle detection**: Prevents infinite symlink loops with configurable depth limits
639///
640/// ## Use Cases
641/// - **Virtual filesystem implementations**: Provides correct symlink resolution behavior
642///   when operating within virtual/constrained directory spaces
643/// - **Containerized environments**: Ensures symlinks resolve properly relative to a virtual root
644/// - **Chroot-like scenarios**: Maintains correct path semantics within bounded directory trees
645/// - **Build systems**: Resolving paths relative to project roots with proper symlink handling
646/// - **Applications needing anchor-relative interpretation**: Consistent path resolution
647///   relative to a base directory while preserving symlink semantics
648/// - **Path sandboxing**: Building higher-level path processing APIs with controlled resolution scope
649///
650/// ## Output Format
651///
652/// The output format follows the same rules as [`soft_canonicalize`]:
653/// - **Without `dunce` feature (default)**: Windows returns extended-length UNC paths (`\\?\C:\foo`)
654/// - **With `dunce` feature enabled**: Windows returns simplified paths (`C:\foo`) when safe
655/// - Unix systems always return standard absolute paths
656///
657/// ## Notes
658/// - The `anchor` is canonicalized (soft) first; the result is absolute
659/// - For fully-existing final paths, this typically matches `std::fs::canonicalize` of the
660///   resolved path; however, semantics differ because `input` is interpreted relative to `anchor`
661/// - Enable with `--features anchored` (optional feature to keep core library lightweight)
662///
663/// ## Example
664/// ```
665/// use soft_canonicalize::{anchored_canonicalize, soft_canonicalize};
666/// use std::fs;
667///
668/// # fn demo() -> Result<(), std::io::Error> {
669/// let anchor = std::env::temp_dir().join("sc_anchor_demo").join("root");
670/// fs::create_dir_all(&anchor)?;
671///
672/// let base = soft_canonicalize(&anchor)?;
673///
674/// // Absolute input paths are clamped to anchor
675/// let out = anchored_canonicalize(&base, "/etc/passwd")?;
676/// assert_eq!(out, base.join("etc").join("passwd"));
677///
678/// // Lexical .. traversal is also clamped
679/// let out2 = anchored_canonicalize(&base, "../../../etc/passwd")?;
680/// assert_eq!(out2, base.join("etc").join("passwd"));
681/// # Ok(())
682/// # }
683/// # demo().unwrap();
684/// ```
685///
686/// ## Symlink Clamping Example
687/// ```
688/// # #[cfg(unix)]
689/// # fn demo() -> Result<(), std::io::Error> {
690/// use soft_canonicalize::{anchored_canonicalize, soft_canonicalize};
691/// use std::os::unix::fs::symlink;
692/// use std::fs;
693///
694/// let anchor = std::env::temp_dir().join("sc_symlink_demo2").join("root");
695/// fs::create_dir_all(&anchor)?;
696/// let base = soft_canonicalize(&anchor)?;
697///
698/// // Create a symlink pointing to absolute path outside anchor
699/// let external_path = std::env::temp_dir().join("external_data2");
700/// fs::create_dir_all(&external_path)?;
701/// let link_path = base.join("mylink");
702/// let _ = fs::remove_file(&link_path); // Clean up if exists
703/// symlink(&external_path, &link_path)?;
704///
705/// // The absolute symlink target is CLAMPED to the anchor
706/// let result = anchored_canonicalize(&base, "mylink")?;
707/// // Result stays within anchor (virtual filesystem semantics)
708/// assert!(result.starts_with(&base));
709/// # Ok(())
710/// # }
711/// # #[cfg(unix)]
712/// # demo().unwrap();
713/// ```
714#[must_use = "this function returns a new PathBuf without modifying the input"]
715#[doc(alias = "chroot")]
716#[doc(alias = "jail")]
717#[doc(alias = "sandbox")]
718#[doc(alias = "virtual_root")]
719#[cfg(feature = "anchored")]
720#[cfg_attr(docsrs, doc(cfg(feature = "anchored")))]
721pub fn anchored_canonicalize(
722    anchor: impl AsRef<Path>,
723    input: impl AsRef<Path>,
724) -> io::Result<PathBuf> {
725    let anchor = anchor.as_ref();
726    let input = input.as_ref();
727
728    // Basic input validation (empty paths)
729    if anchor.as_os_str().is_empty() {
730        return Err(error_with_path(
731            io::ErrorKind::NotFound,
732            anchor,
733            "anchor path is empty",
734        ));
735    }
736
737    // Reject NULs (platform-specific)
738    reject_nul_bytes(anchor)?;
739    reject_nul_bytes(input)?;
740
741    // Windows-only: reject incomplete UNC anchors early
742    #[cfg(windows)]
743    {
744        if is_incomplete_unc(anchor) {
745            return Err(error_with_path(
746                io::ErrorKind::InvalidInput,
747                anchor,
748                "invalid UNC path: missing share",
749            ));
750        }
751    }
752
753    // On Windows, treat drive-relative anchors (e.g., "C:dir") as absolute anchors ("C:\\dir").
754    // Anchors act as virtual roots and should not depend on the process's per-drive cwd.
755    #[cfg(windows)]
756    let anchor = {
757        use std::path::{Component, Prefix};
758        let mut comps = anchor.components();
759        match comps.next() {
760            Some(Component::Prefix(pr)) => match pr.kind() {
761                Prefix::Disk(drive) => {
762                    let mut rest = comps.clone();
763                    let is_absolute = matches!(rest.next(), Some(Component::RootDir));
764                    if is_absolute {
765                        anchor.to_path_buf()
766                    } else {
767                        // Synthesize absolute from drive-relative: "C:\\" + remaining components
768                        let mut out = PathBuf::from(format!("{}:\\", drive as char));
769                        for c in comps {
770                            out.push(c.as_os_str());
771                        }
772                        out
773                    }
774                }
775                _ => anchor.to_path_buf(),
776            },
777            _ => anchor.to_path_buf(),
778        }
779    };
780
781    // Canonicalize anchor (soft) to get absolute, platform-correct base even if parts don't exist.
782    let mut base = soft_canonicalize(anchor)?;
783
784    // Early ADS validation on the combined textual intent (defense-in-depth)
785    #[cfg(windows)]
786    validate_windows_ads_layout(&base.join(input))?;
787
788    // Clamp floor: all paths (including symlink targets) stay within the anchor.
789    let anchor_floor = base.clone();
790
791    // Process components directly without a queue - simpler and more efficient
792    for comp in input.components() {
793        use std::path::Component;
794        match comp {
795            Component::Normal(seg) => {
796                base.push(seg);
797
798                // Resolve symlink chain at `base` using anchor-aware resolver
799                if let Ok(meta) = std::fs::symlink_metadata(&base) {
800                    if meta.file_type().is_symlink() {
801                        // Use anchored symlink resolver that implements virtual filesystem semantics
802                        let resolved =
803                            crate::symlink::resolve_anchored_symlink_chain(&base, &anchor_floor)?;
804
805                        // Final safety check: ensure resolved path is within anchor
806                        if !resolved.starts_with(&anchor_floor) {
807                            // Virtual filesystem semantics: reinterpret escaped path as relative to anchor
808                            // Find common ancestor and preserve relative path structure
809                            // Example: resolved = /tmp/xyz/opt/file, anchor = /tmp/xyz/home/jail
810                            // Common ancestor: /tmp/xyz
811                            // Resolved relative to common: opt/file
812                            // Result: /tmp/xyz/home/jail/opt/file
813
814                            // Find longest common prefix by comparing components
815                            let mut common_depth = 0;
816                            let anchor_comps: Vec<_> = anchor_floor.components().collect();
817                            let resolved_comps: Vec<_> = resolved.components().collect();
818                            for (a, r) in anchor_comps.iter().zip(resolved_comps.iter()) {
819                                if a == r {
820                                    common_depth += 1;
821                                } else {
822                                    break;
823                                }
824                            }
825
826                            // Build clamped path: anchor + (resolved components after common prefix)
827                            base = anchor_floor.clone();
828                            for comp in resolved_comps.iter().skip(common_depth) {
829                                base.push(comp);
830                            }
831                        } else {
832                            base = resolved;
833                        }
834                    }
835                }
836            }
837            Component::ParentDir => {
838                // Clamp ".." to anchor boundary
839                if base != anchor_floor && base.starts_with(&anchor_floor) {
840                    let _ = base.pop();
841                }
842            }
843            Component::CurDir => {
844                // Skip "." - no-op
845            }
846            Component::RootDir | Component::Prefix(_) => {
847                // Strip root/prefix per spec; do not process
848            }
849        }
850    }
851
852    // LATE Windows ADS validation
853    #[cfg(windows)]
854    validate_windows_ads_layout(&base)?;
855
856    // Ensure Windows extended-length normalization for absolute results
857    // We always add \\?\ for robustness, then let dunce decide whether to strip it (if enabled)
858    #[cfg(windows)]
859    {
860        use std::path::{Component, Prefix};
861        if let Some(Component::Prefix(pr)) = base.components().next() {
862            match pr.kind() {
863                Prefix::Verbatim(_) | Prefix::VerbatimDisk(_) | Prefix::VerbatimUNC(_, _) => {}
864                Prefix::Disk(_) | Prefix::UNC(_, _) => {
865                    base = ensure_windows_extended_prefix(&base);
866                }
867                Prefix::DeviceNS(_) => {}
868            }
869        }
870    }
871
872    // Optional: dunce feature - simplify UNC paths to legacy format when safe
873    // dunce::simplified() intelligently strips \\?\ only when safe (no reserved names,
874    // path length ok, no .., etc.). It performs no I/O and handles non-existing paths correctly.
875    #[cfg(all(feature = "dunce", windows))]
876    {
877        base = dunce::simplified(&base).to_path_buf();
878    }
879
880    Ok(base)
881}
882
883#[cfg(test)]
884mod tests {
885    // Test utilities for feature-conditional assertions
886    mod test_utils;
887
888    #[cfg(feature = "anchored")]
889    mod anchored_canonicalize;
890    #[cfg(feature = "anchored")]
891    mod anchored_relative_symlink_clamping;
892    #[cfg(feature = "anchored")]
893    mod anchored_security;
894    #[cfg(feature = "anchored")]
895    mod anchored_symlink_clamping;
896    mod api_compatibility;
897    mod basic_functionality;
898    mod cve_2024_2025_security;
899    mod cve_tests;
900    mod edge_case_robustness;
901    mod edge_cases;
902    mod exotic_edge_cases;
903    mod format_verification;
904    mod optimization;
905    mod path_traversal;
906    mod platform_specific;
907    mod python_inspired_tests;
908    mod python_lessons;
909    mod security_audit;
910    mod short_filename_detection;
911    mod std_behavior;
912    mod symlink_depth;
913    mod symlink_dotdot_resolution_order;
914    mod symlink_dotdot_symlink_first;
915    #[cfg(windows)]
916    mod windows_path_stripping;
917
918    // dunce feature test suite (Windows-only)
919    #[cfg(all(feature = "dunce", windows))]
920    mod dunce_feature;
921}