Skip to main content

rskit_fs/
path.rs

1//! Safe path helpers.
2
3use std::ffi::OsString;
4use std::fmt;
5use std::io::ErrorKind;
6use std::path::{Component, Path, PathBuf};
7
8use rskit_errors::{AppError, AppResult, ErrorCode};
9
10/// Error returned when a relative path escapes its expected root.
11#[derive(Debug, Clone, PartialEq, Eq)]
12#[non_exhaustive]
13pub enum SafePathError {
14    /// Absolute paths are not accepted for root-relative operations.
15    Absolute,
16    /// Parent-directory components are not accepted for root-relative operations.
17    ParentDir,
18    /// Platform-specific path prefixes are not accepted for root-relative operations.
19    Prefix,
20}
21
22impl fmt::Display for SafePathError {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        match self {
25            Self::Absolute => f.write_str("path must be relative, not absolute"),
26            Self::ParentDir => f.write_str("path must not contain '..' segments"),
27            Self::Prefix => f.write_str("path must not contain a platform path prefix"),
28        }
29    }
30}
31
32impl std::error::Error for SafePathError {}
33
34/// Validate that `path` is safe to join under a caller-owned root directory.
35pub fn validate_relative_path(path: &Path) -> Result<(), SafePathError> {
36    for component in path.components() {
37        match component {
38            Component::RootDir => return Err(SafePathError::Absolute),
39            Component::ParentDir => return Err(SafePathError::ParentDir),
40            #[cfg(windows)]
41            Component::Prefix(_) => return Err(SafePathError::Prefix),
42            #[cfg(not(windows))]
43            Component::Prefix(_) | Component::CurDir | Component::Normal(_) => {}
44            #[cfg(windows)]
45            Component::CurDir | Component::Normal(_) => {}
46        }
47    }
48    Ok(())
49}
50
51/// Join a caller-owned root with a validated relative path.
52pub fn safe_join(root: &Path, rel_path: impl AsRef<Path>) -> Result<PathBuf, SafePathError> {
53    let rel_path = rel_path.as_ref();
54    validate_relative_path(rel_path)?;
55    Ok(root.join(rel_path))
56}
57
58/// Return an absolute path without requiring the path to exist.
59pub fn absolute(path: &Path) -> AppResult<PathBuf> {
60    if path.is_absolute() {
61        return Ok(path.to_path_buf());
62    }
63    std::env::current_dir()
64        .map(|cwd| cwd.join(path))
65        .map_err(|error| AppError::new(ErrorCode::Internal, format!("failed to read cwd: {error}")))
66}
67
68/// Canonicalize a path by resolving symlinks and normalizing components.
69pub fn canonicalize(path: &Path) -> AppResult<PathBuf> {
70    std::fs::canonicalize(path).map_err(|error| {
71        AppError::new(
72            ErrorCode::Internal,
73            format!("failed to canonicalize '{}': {error}", path.display()),
74        )
75    })
76}
77
78/// Resolve `root` against a base directory when relative, then canonicalize it.
79///
80/// Common when a config or manifest file declares a `root` that is either
81/// absolute or relative to the file's own directory. A relative `root` is joined
82/// to `base_dir`; an absolute `root` is used as-is. The result is canonicalized,
83/// so the returned path always exists (canonicalization resolves symlinks and
84/// requires the target to exist). When `root` is `None`, the current directory
85/// (`"."`) is resolved against `base_dir`.
86///
87/// `field` names the source field for error reporting.
88///
89/// This resolves and canonicalizes but does not confine: it does not reject a
90/// `root` that escapes `base_dir`. Use [`confine_path`] or [`confine_existing_path`]
91/// when the resolved path must stay within a trust boundary.
92///
93/// # Errors
94///
95/// Returns [`AppError`] when the resolved path cannot be canonicalized (for
96/// example, it does not exist), with the underlying cause preserved.
97pub fn resolve_root_relative_to(
98    field: &str,
99    base_dir: &Path,
100    root: Option<&Path>,
101) -> AppResult<PathBuf> {
102    let root = root.unwrap_or_else(|| Path::new("."));
103    let resolved = if root.is_absolute() {
104        root.to_path_buf()
105    } else {
106        base_dir.join(root)
107    };
108    canonicalize(&resolved).map_err(|error| {
109        AppError::invalid_input(
110            field,
111            format!("failed to resolve {field} '{}'", resolved.display()),
112        )
113        .with_cause(error)
114    })
115}
116
117/// Canonicalize an existing `path` and reject it when it resolves outside `root`.
118///
119/// Use this for existing untrusted file paths before handing them to lower-level IO
120/// or subprocess APIs. Both `root` and `path` are resolved through the filesystem so
121/// symlink escapes are rejected. Relative `path` values are interpreted under `root`;
122/// absolute `path` values are accepted only when their canonical destination is still
123/// within `root`.
124///
125/// # Errors
126///
127/// Returns an error when `root` or `path` cannot be canonicalized, when `root`
128/// is not a directory, or when `path` resolves outside the canonical root.
129pub fn confine_existing_path(root: &Path, path: &Path) -> AppResult<PathBuf> {
130    let root = canonicalize_directory_root(root)?;
131    let candidate = if path.is_absolute() {
132        path.to_path_buf()
133    } else {
134        root.join(path)
135    };
136    let candidate = canonicalize_confined_input(&candidate, "confined path")?;
137    ensure_confined(&root, &candidate)?;
138    Ok(candidate)
139}
140
141/// Resolve `path` under `root` and reject escapes, allowing the final path to be missing.
142///
143/// This is intended for output paths. The nearest existing ancestor is canonicalized to catch
144/// symlink escapes before new directories or files are created. Relative `path` values are
145/// interpreted under `root`; absolute `path` values are accepted only when their resolved
146/// existing ancestor remains within `root`.
147///
148/// # Errors
149///
150/// Returns an error when `root` cannot be canonicalized, `root` is not a directory, no existing
151/// ancestor can be found, an existing ancestor resolves outside `root`, a missing suffix would be
152/// appended below a non-directory ancestor, or a missing path segment is unsafe.
153pub fn confine_path(root: &Path, path: &Path) -> AppResult<PathBuf> {
154    let root = canonicalize_directory_root(root)?;
155    let candidate = if path.is_absolute() {
156        path.to_path_buf()
157    } else {
158        root.join(path)
159    };
160    let (existing, missing) = existing_ancestor_and_missing_suffix(&candidate)?;
161    let existing = canonicalize_existing_ancestor(&existing)?;
162    ensure_confined(&root, &existing)?;
163    ensure_directory_for_missing_suffix(&existing, &missing)?;
164
165    let resolved = append_safe_missing_suffix(existing, missing)?;
166    ensure_confined(&root, &resolved)?;
167    Ok(resolved)
168}
169
170fn existing_ancestor_and_missing_suffix(path: &Path) -> AppResult<(PathBuf, Vec<OsString>)> {
171    let mut missing = Vec::new();
172    let mut current = path.to_path_buf();
173    while !exists_without_following_symlinks(&current)? {
174        let Some(name) = current.file_name().map(OsString::from) else {
175            return Err(AppError::new(
176                ErrorCode::NotFound,
177                format!("no existing ancestor for '{}'", path.display()),
178            ));
179        };
180        missing.push(name);
181        let Some(parent) = current.parent() else {
182            return Err(AppError::new(
183                ErrorCode::NotFound,
184                format!("no existing ancestor for '{}'", path.display()),
185            ));
186        };
187        current = parent.to_path_buf();
188    }
189    missing.reverse();
190    Ok((current, missing))
191}
192
193fn canonicalize_directory_root(root: &Path) -> AppResult<PathBuf> {
194    let root = canonicalize_confined_input(root, "confined root")?;
195    let metadata = std::fs::metadata(&root).map_err(|error| {
196        AppError::new(
197            ErrorCode::Internal,
198            format!(
199                "failed to inspect confined root '{}': {error}",
200                root.display()
201            ),
202        )
203    })?;
204    if metadata.is_dir() {
205        return Ok(root);
206    }
207    Err(AppError::new(
208        ErrorCode::InvalidInput,
209        format!("confined root '{}' is not a directory", root.display()),
210    ))
211}
212
213fn canonicalize_confined_input(path: &Path, label: &str) -> AppResult<PathBuf> {
214    std::fs::canonicalize(path).map_err(|error| {
215        AppError::new(
216            confined_canonicalize_error_code(error.kind()),
217            format!(
218                "failed to canonicalize {label} '{}': {error}",
219                path.display()
220            ),
221        )
222    })
223}
224
225const fn confined_canonicalize_error_code(kind: ErrorKind) -> ErrorCode {
226    match kind {
227        ErrorKind::NotFound => ErrorCode::NotFound,
228        ErrorKind::InvalidInput | ErrorKind::NotADirectory => ErrorCode::InvalidInput,
229        _ => ErrorCode::Internal,
230    }
231}
232
233fn exists_without_following_symlinks(path: &Path) -> AppResult<bool> {
234    match std::fs::symlink_metadata(path) {
235        Ok(_) => Ok(true),
236        Err(error) if matches!(error.kind(), ErrorKind::NotFound | ErrorKind::NotADirectory) => {
237            Ok(false)
238        }
239        Err(error) => Err(AppError::new(
240            ErrorCode::Internal,
241            format!("failed to inspect '{}': {error}", path.display()),
242        )),
243    }
244}
245
246fn canonicalize_existing_ancestor(path: &Path) -> AppResult<PathBuf> {
247    canonicalize_confined_input(path, "existing path ancestor").map_err(|error| {
248        AppError::new(
249            error.code(),
250            format!(
251                "existing path ancestor '{}' cannot be resolved: {}",
252                path.display(),
253                error.message()
254            ),
255        )
256    })
257}
258
259fn ensure_directory_for_missing_suffix(existing: &Path, missing: &[OsString]) -> AppResult<()> {
260    if missing.is_empty() {
261        return Ok(());
262    }
263    let metadata = std::fs::metadata(existing).map_err(|error| {
264        AppError::new(
265            ErrorCode::Internal,
266            format!(
267                "failed to inspect existing path ancestor '{}': {error}",
268                existing.display()
269            ),
270        )
271    })?;
272    if metadata.is_dir() {
273        return Ok(());
274    }
275    Err(AppError::new(
276        ErrorCode::InvalidInput,
277        format!(
278            "existing path ancestor '{}' is not a directory",
279            existing.display()
280        ),
281    ))
282}
283
284fn append_safe_missing_suffix(mut base: PathBuf, missing: Vec<OsString>) -> AppResult<PathBuf> {
285    for segment in missing {
286        let segment_path = Path::new(&segment);
287        validate_relative_path(segment_path).map_err(|error| {
288            AppError::new(
289                ErrorCode::InvalidInput,
290                format!(
291                    "path segment '{}' is not safe: {error}",
292                    segment_path.display()
293                ),
294            )
295        })?;
296        let mut components = segment_path.components();
297        if !matches!(components.next(), Some(Component::Normal(_))) || components.next().is_some() {
298            return Err(AppError::new(
299                ErrorCode::InvalidInput,
300                format!("path segment '{}' is not safe", segment_path.display()),
301            ));
302        }
303        base.push(segment);
304    }
305    Ok(base)
306}
307
308fn ensure_confined(root: &Path, path: &Path) -> AppResult<()> {
309    if path.starts_with(root) {
310        return Ok(());
311    }
312    Err(AppError::new(
313        ErrorCode::InvalidInput,
314        format!(
315            "path '{}' resolves outside confined root '{}'",
316            path.display(),
317            root.display()
318        ),
319    ))
320}
321
322/// Return the non-empty parent directory for `path`.
323#[must_use]
324pub fn parent_dir(path: &Path) -> Option<&Path> {
325    path.parent()
326        .filter(|parent| !parent.as_os_str().is_empty())
327}
328
329#[cfg(test)]
330mod tests {
331    use std::ffi::OsString;
332    use std::path::Path;
333
334    use rskit_errors::ErrorCode;
335
336    use super::{
337        SafePathError, absolute, append_safe_missing_suffix, canonicalize, confine_existing_path,
338        confine_path, resolve_root_relative_to, safe_join, validate_relative_path,
339    };
340
341    #[test]
342    fn validates_safe_relative_paths() {
343        assert!(validate_relative_path(Path::new("a/b.txt")).is_ok());
344        assert!(validate_relative_path(Path::new("./a/b.txt")).is_ok());
345    }
346
347    #[test]
348    fn rejects_absolute_paths() {
349        assert_eq!(
350            validate_relative_path(Path::new("/etc/passwd")).unwrap_err(),
351            SafePathError::Absolute
352        );
353    }
354
355    #[test]
356    fn rejects_parent_dir_paths() {
357        assert_eq!(
358            validate_relative_path(Path::new("../escape")).unwrap_err(),
359            SafePathError::ParentDir
360        );
361    }
362
363    #[test]
364    fn displays_safe_path_errors() {
365        assert_eq!(
366            SafePathError::Absolute.to_string(),
367            "path must be relative, not absolute"
368        );
369        assert_eq!(
370            SafePathError::ParentDir.to_string(),
371            "path must not contain '..' segments"
372        );
373        assert_eq!(
374            SafePathError::Prefix.to_string(),
375            "path must not contain a platform path prefix"
376        );
377    }
378
379    #[test]
380    fn safe_join_keeps_paths_under_root() {
381        assert_eq!(
382            safe_join(Path::new("/root"), "a/b.txt").unwrap(),
383            Path::new("/root").join("a/b.txt")
384        );
385    }
386
387    #[test]
388    fn absolute_resolves_relative_paths() {
389        let path = absolute(Path::new("a/b.txt")).unwrap();
390        assert!(path.is_absolute());
391        assert!(path.ends_with("a/b.txt"));
392    }
393
394    #[test]
395    fn absolute_returns_absolute_paths_unchanged() {
396        let path = Path::new("/tmp/a.txt");
397        assert_eq!(absolute(path).unwrap(), path);
398    }
399
400    #[test]
401    fn canonicalize_resolves_existing_paths_and_reports_missing() {
402        let dir = crate::TempDir::new().unwrap();
403        let file = dir.write_file("file.txt", b"hello").unwrap();
404
405        assert_eq!(
406            canonicalize(&file).unwrap(),
407            std::fs::canonicalize(&file).unwrap()
408        );
409        assert!(canonicalize(&dir.child("missing.txt").unwrap()).is_err());
410    }
411
412    #[test]
413    fn confines_existing_paths_under_root() {
414        let dir = crate::TempDir::new().unwrap();
415        let file = dir.write_file("nested/file.txt", b"hello").unwrap();
416
417        let confined = confine_existing_path(dir.path(), Path::new("nested/file.txt")).unwrap();
418
419        assert_eq!(confined, std::fs::canonicalize(file).unwrap());
420    }
421
422    #[test]
423    fn rejects_existing_paths_outside_root() {
424        let root = crate::TempDir::new().unwrap();
425        let outside = crate::TempDir::new().unwrap();
426        let file = outside.write_file("file.txt", b"hello").unwrap();
427
428        let error = confine_existing_path(root.path(), &file).unwrap_err();
429
430        assert_eq!(error.code(), ErrorCode::InvalidInput);
431    }
432
433    #[test]
434    fn rejects_missing_existing_paths_as_not_found() {
435        let root = crate::TempDir::new().unwrap();
436
437        let error = confine_existing_path(root.path(), Path::new("missing.txt")).unwrap_err();
438
439        assert_eq!(error.code(), ErrorCode::NotFound);
440    }
441
442    #[test]
443    fn rejects_missing_confined_roots_as_not_found() {
444        let dir = crate::TempDir::new().unwrap();
445        let missing_root = dir.child("missing-root").unwrap();
446
447        let error = confine_existing_path(&missing_root, Path::new("file.txt")).unwrap_err();
448
449        assert_eq!(error.code(), ErrorCode::NotFound);
450    }
451
452    #[test]
453    fn rejects_file_root_for_existing_paths() {
454        let dir = crate::TempDir::new().unwrap();
455        let root_file = dir.write_file("root.txt", b"not a dir").unwrap();
456
457        let error = confine_existing_path(&root_file, Path::new("child.txt")).unwrap_err();
458
459        assert_eq!(error.code(), ErrorCode::InvalidInput);
460    }
461
462    #[test]
463    fn confines_missing_output_paths_under_existing_parent() {
464        let dir = crate::TempDir::new().unwrap();
465
466        let confined = confine_path(dir.path(), Path::new("nested/output.txt")).unwrap();
467
468        assert!(confined.starts_with(std::fs::canonicalize(dir.path()).unwrap()));
469        assert!(confined.ends_with("nested/output.txt"));
470    }
471
472    #[test]
473    fn rejects_file_root_for_output_paths() {
474        let dir = crate::TempDir::new().unwrap();
475        let root_file = dir.write_file("root.txt", b"not a dir").unwrap();
476
477        let error = confine_path(&root_file, Path::new("output.txt")).unwrap_err();
478
479        assert_eq!(error.code(), ErrorCode::InvalidInput);
480    }
481
482    #[test]
483    fn rejects_missing_output_paths_below_existing_file() {
484        let dir = crate::TempDir::new().unwrap();
485        dir.write_file("file.txt", b"not a dir").unwrap();
486
487        let error = confine_path(dir.path(), Path::new("file.txt/output.txt")).unwrap_err();
488
489        assert_eq!(error.code(), ErrorCode::InvalidInput);
490    }
491
492    #[test]
493    fn rejects_curdir_missing_path_segments() {
494        let dir = crate::TempDir::new().unwrap();
495
496        let error = append_safe_missing_suffix(dir.path().to_path_buf(), vec![OsString::from(".")])
497            .unwrap_err();
498
499        assert_eq!(error.code(), ErrorCode::InvalidInput);
500    }
501
502    #[cfg(unix)]
503    #[test]
504    fn rejects_missing_paths_below_symlink_escape() {
505        let root = crate::TempDir::new().unwrap();
506        let outside = crate::TempDir::new().unwrap();
507        let link = root.child("link").unwrap();
508        std::os::unix::fs::symlink(outside.path(), &link).unwrap();
509
510        let error = confine_path(root.path(), Path::new("link/output.txt")).unwrap_err();
511
512        assert_eq!(error.code(), ErrorCode::InvalidInput);
513    }
514
515    #[cfg(unix)]
516    #[test]
517    fn rejects_missing_paths_below_broken_symlink() {
518        let root = crate::TempDir::new().unwrap();
519        let link = root.child("broken-link").unwrap();
520        let target = root.child("missing-target").unwrap();
521        std::os::unix::fs::symlink(target, &link).unwrap();
522
523        let error = confine_path(root.path(), Path::new("broken-link/output.txt")).unwrap_err();
524
525        assert_eq!(error.code(), ErrorCode::NotFound);
526    }
527
528    #[test]
529    fn resolve_root_defaults_to_base_dir() {
530        let dir = crate::TempDir::new().unwrap();
531
532        let root = resolve_root_relative_to("root", dir.path(), None).unwrap();
533
534        assert_eq!(root, canonicalize(dir.path()).unwrap());
535    }
536
537    #[test]
538    fn resolve_root_joins_relative_against_base_dir() {
539        let dir = crate::TempDir::new().unwrap();
540        let workspace = dir.path().join("workspace");
541        std::fs::create_dir(&workspace).unwrap();
542
543        let root =
544            resolve_root_relative_to("root", dir.path(), Some(Path::new("workspace"))).unwrap();
545
546        assert_eq!(root, canonicalize(&workspace).unwrap());
547    }
548
549    #[test]
550    fn resolve_root_accepts_absolute_root() {
551        let base = crate::TempDir::new().unwrap();
552        let target = crate::TempDir::new().unwrap();
553
554        let root = resolve_root_relative_to("root", base.path(), Some(target.path())).unwrap();
555
556        assert_eq!(root, canonicalize(target.path()).unwrap());
557    }
558
559    #[test]
560    fn resolve_root_surfaces_canonicalization_failure() {
561        let dir = crate::TempDir::new().unwrap();
562
563        let error =
564            resolve_root_relative_to("root", dir.path(), Some(Path::new("missing"))).unwrap_err();
565
566        assert_eq!(error.code(), ErrorCode::InvalidInput);
567        assert!(error.message().contains("failed to resolve root"));
568    }
569}