Skip to main content

simdutf8_cli/
path_security.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: 2025,2026 ndaal Gesellschaft für Sicherheit in der Informationstechnik mbH & Co KG, Cologne
3// SPDX-FileCopyrightText: Author: Pierre Gronau <Pierre.Gronau@ndaal.eu>
4
5//! Hardened file access for untrusted path arguments.
6//!
7//! A CLI that opens arbitrary user-supplied paths must defend against a number
8//! of classic attacks. This module centralises that policy so the rest of the
9//! program never touches the filesystem directly:
10//!
11//! * **Path traversal** — when a base directory is configured, a `..` component
12//!   is rejected lexically *and* the fully resolved path is verified to stay
13//!   inside the base directory.
14//! * **Symlink escapes / TOCTOU** — paths are [canonicalized](std::fs::canonicalize)
15//!   (resolving symlinks and `..`) before the containment check, and symlinks
16//!   can be denied outright. The opened file handle's own metadata is then
17//!   re-checked (`fstat` on the descriptor) so the type/size decision is made on
18//!   the object we actually opened, not on a name that may have been swapped.
19//! * **Non-regular files** — directories, devices, FIFOs and sockets are
20//!   rejected; only regular files are accepted.
21//! * **Resource exhaustion** — reads are hard-capped at a configurable byte
22//!   limit, independent of the size the filesystem metadata claims.
23//!
24//! The entry points are [`PathPolicy::open`] / [`PathPolicy::read`] for files on
25//! disk, [`read_capped`] for arbitrary readers such as standard input,
26//! [`write_in_dir`] for capability-scoped writes (via `cap-std`), and the
27//! lexical [`safe_join`] primitive for confining attacker-influenced relative
28//! paths to a base directory.
29
30use std::fs::File;
31use std::io::{Read, Write};
32use std::path::{Component, Path, PathBuf};
33
34use cap_std::ambient_authority;
35use cap_std::fs::Dir;
36
37/// Default upper bound on the number of bytes read from a single input (64 MiB).
38pub const DEFAULT_MAX_FILE_SIZE: u64 = 64 * 1024 * 1024;
39
40/// Errors that can occur while securely resolving and opening an input path.
41#[derive(Debug, thiserror::Error)]
42pub enum PathSecurityError {
43    /// The supplied path was empty.
44    #[error("input path is empty")]
45    EmptyPath,
46
47    /// The supplied path contained an interior NUL byte.
48    #[error("input path contains an interior NUL byte")]
49    InteriorNul,
50
51    /// A `..` component was present while a base directory was configured.
52    #[error("input path contains a '..' component, which is not allowed with --base-dir: {}", .0.display())]
53    ParentTraversal(PathBuf),
54
55    /// The resolved path lay outside the configured base directory.
56    #[error("resolved path {} escapes the permitted base directory {}", .path.display(), .base.display())]
57    OutsideBase {
58        /// The fully resolved (canonical) path that was rejected.
59        path: PathBuf,
60        /// The configured base directory (canonical).
61        base: PathBuf,
62    },
63
64    /// A symbolic link was encountered while symlinks were disallowed.
65    #[error("symbolic links are not permitted: {}", .0.display())]
66    SymlinkDenied(PathBuf),
67
68    /// The path resolved to something other than a regular file.
69    #[error("not a regular file: {}", .0.display())]
70    NotRegularFile(PathBuf),
71
72    /// The input exceeded the configured byte limit.
73    #[error("input is too large: {size} bytes exceeds the {limit} byte limit")]
74    TooLarge {
75        /// Observed size in bytes (at least `limit + 1` for streamed inputs).
76        size: u64,
77        /// The configured limit in bytes.
78        limit: u64,
79    },
80
81    /// An underlying I/O error occurred while accessing the path.
82    #[error("failed to access {}: {source}", .path.display())]
83    Io {
84        /// The path that was being accessed.
85        path: PathBuf,
86        /// The underlying I/O error.
87        #[source]
88        source: std::io::Error,
89    },
90}
91
92/// A successfully opened, validated input file.
93#[derive(Debug)]
94pub struct OpenedFile {
95    /// The open file handle (already confirmed to be a regular file).
96    pub file: File,
97    /// The fully resolved (canonical) path of the file.
98    pub path: PathBuf,
99    /// The size in bytes as reported by the file handle's metadata.
100    pub size: u64,
101}
102
103/// Policy controlling how input paths are resolved and opened.
104///
105/// Construct with [`PathPolicy::new`] (or [`Default`]) and refine with the
106/// builder-style methods.
107#[derive(Clone, Debug)]
108pub struct PathPolicy {
109    base_dir: Option<PathBuf>,
110    allow_symlinks: bool,
111    max_file_size: u64,
112}
113
114impl Default for PathPolicy {
115    fn default() -> Self {
116        Self::new()
117    }
118}
119
120impl PathPolicy {
121    /// Create a policy with safe defaults: no base-directory confinement,
122    /// symlinks allowed (but resolved and re-checked), and a
123    /// [`DEFAULT_MAX_FILE_SIZE`] byte cap.
124    #[must_use]
125    pub const fn new() -> Self {
126        Self {
127            base_dir: None,
128            allow_symlinks: true,
129            max_file_size: DEFAULT_MAX_FILE_SIZE,
130        }
131    }
132
133    /// Confine all inputs to `base`: resolved paths must stay within it.
134    #[must_use]
135    pub fn base_dir(mut self, base: impl Into<PathBuf>) -> Self {
136        self.base_dir = Some(base.into());
137        self
138    }
139
140    /// Allow (`true`, the default) or deny (`false`) symbolic links.
141    #[must_use]
142    pub const fn allow_symlinks(mut self, allow: bool) -> Self {
143        self.allow_symlinks = allow;
144        self
145    }
146
147    /// Set the maximum number of bytes that may be read from a single input.
148    #[must_use]
149    pub const fn max_file_size(mut self, limit: u64) -> Self {
150        self.max_file_size = limit;
151        self
152    }
153
154    /// The configured byte limit.
155    #[must_use]
156    pub const fn limit(&self) -> u64 {
157        self.max_file_size
158    }
159
160    /// Resolve, validate and open `requested`, returning an [`OpenedFile`].
161    ///
162    /// # Errors
163    ///
164    /// Returns a [`PathSecurityError`] if the path is empty, contains an
165    /// interior NUL, traverses outside the configured base directory, is a
166    /// disallowed symlink, is not a regular file, exceeds the size limit, or
167    /// cannot be accessed.
168    pub fn open(&self, requested: &Path) -> Result<OpenedFile, PathSecurityError> {
169        let canonical = self.resolve_path(requested)?;
170
171        // Open the canonical path, then re-derive type and size from the open
172        // descriptor itself (fstat) so the decision is made on the object we hold
173        // open rather than on a name that could have changed (TOCTOU mitigation).
174        let file = File::open(&canonical).map_err(|source| PathSecurityError::Io {
175            path: canonical.clone(),
176            source,
177        })?;
178        let meta = file.metadata().map_err(|source| PathSecurityError::Io {
179            path: canonical.clone(),
180            source,
181        })?;
182        if !meta.is_file() {
183            return Err(PathSecurityError::NotRegularFile(canonical));
184        }
185
186        // Enforce the size limit (the subsequent read is hard-capped too).
187        let size = meta.len();
188        if size > self.max_file_size {
189            return Err(PathSecurityError::TooLarge {
190                size,
191                limit: self.max_file_size,
192            });
193        }
194
195        Ok(OpenedFile {
196            file,
197            path: canonical,
198            size,
199        })
200    }
201
202    /// Validate `requested` and resolve it to a canonical path that is confined
203    /// to the configured base directory (if any). Performs no file open.
204    ///
205    /// # Errors
206    ///
207    /// Returns a [`PathSecurityError`] for an empty/NUL path, a `..` traversal,
208    /// a disallowed symlink, a path escaping the base directory, or I/O failure.
209    fn resolve_path(&self, requested: &Path) -> Result<PathBuf, PathSecurityError> {
210        // 1. Reject an empty path outright.
211        if requested.as_os_str().is_empty() {
212            return Err(PathSecurityError::EmptyPath);
213        }
214
215        // 2. Reject interior NUL bytes before touching the filesystem.
216        if requested.as_os_str().as_encoded_bytes().contains(&0) {
217            return Err(PathSecurityError::InteriorNul);
218        }
219
220        // 3. With a base directory, reject `..` lexically as defence in depth
221        //    (the canonical containment check below is the authoritative one).
222        if self.base_dir.is_some()
223            && requested
224                .components()
225                .any(|component| matches!(component, Component::ParentDir))
226        {
227            return Err(PathSecurityError::ParentTraversal(requested.to_path_buf()));
228        }
229
230        // 4. If symlinks are disallowed, reject a symlinked final component.
231        if !self.allow_symlinks {
232            let meta =
233                std::fs::symlink_metadata(requested).map_err(|source| PathSecurityError::Io {
234                    path: requested.to_path_buf(),
235                    source,
236                })?;
237            if meta.file_type().is_symlink() {
238                return Err(PathSecurityError::SymlinkDenied(requested.to_path_buf()));
239            }
240        }
241
242        // 5. Canonicalize: resolves `.`/`..` and every symlink, yielding an
243        //    absolute path to the real object on disk.
244        let canonical = requested
245            .canonicalize()
246            .map_err(|source| PathSecurityError::Io {
247                path: requested.to_path_buf(),
248                source,
249            })?;
250
251        // 6. Containment: the resolved path must live inside the resolved base.
252        if let Some(base) = &self.base_dir {
253            let canonical_base = base
254                .canonicalize()
255                .map_err(|source| PathSecurityError::Io {
256                    path: base.clone(),
257                    source,
258                })?;
259            if !canonical.starts_with(&canonical_base) {
260                return Err(PathSecurityError::OutsideBase {
261                    path: canonical,
262                    base: canonical_base,
263                });
264            }
265        }
266
267        Ok(canonical)
268    }
269
270    /// Open `requested` and read its contents, hard-capped at the byte limit.
271    ///
272    /// # Errors
273    ///
274    /// As for [`PathPolicy::open`], plus [`PathSecurityError::TooLarge`] if the
275    /// file streams more than the configured limit.
276    pub fn read(&self, requested: &Path) -> Result<Vec<u8>, PathSecurityError> {
277        let OpenedFile { file, path, .. } = self.open(requested)?;
278        read_capped(file, self.max_file_size).map_err(move |error| match error {
279            // Replace the placeholder stream path with the real file path.
280            PathSecurityError::Io { source, .. } => PathSecurityError::Io { path, source },
281            other => other,
282        })
283    }
284}
285
286/// Read at most `limit` bytes from `reader`, erroring if more are available.
287///
288/// This bounds memory use for streamed inputs (e.g. standard input) where no
289/// size is known in advance.
290///
291/// # Errors
292///
293/// Returns [`PathSecurityError::TooLarge`] if `reader` yields more than `limit`
294/// bytes, or [`PathSecurityError::Io`] on an underlying read error.
295pub fn read_capped<R: Read>(reader: R, limit: u64) -> Result<Vec<u8>, PathSecurityError> {
296    // Read at most `limit + 1` bytes: the extra byte lets us detect overflow
297    // without trusting any externally reported size.
298    let mut limited = reader.take(limit.saturating_add(1));
299    let mut buf = Vec::new();
300    limited
301        .read_to_end(&mut buf)
302        .map_err(|source| PathSecurityError::Io {
303            path: PathBuf::from("<stream>"),
304            source,
305        })?;
306
307    let len = u64::try_from(buf.len()).unwrap_or(u64::MAX);
308    if len > limit {
309        return Err(PathSecurityError::TooLarge { size: len, limit });
310    }
311    Ok(buf)
312}
313
314/// Write `bytes` to a file named `name` inside the directory `dir`, using a
315/// capability-scoped [`cap_std::fs::Dir`] handle (see `skills/rust-path-security.md`).
316///
317/// `name` must be a single relative file name; `cap-std` rejects any `..`,
318/// absolute path, or symlink that would escape `dir` at the syscall layer, so a
319/// crafted report file name cannot redirect the write outside `dir`. This is the
320/// one place ambient authority crosses into the program for writes.
321///
322/// # Errors
323///
324/// Returns [`PathSecurityError::Io`] if `dir` cannot be opened or the file
325/// cannot be created/written.
326pub fn write_in_dir(dir: &Path, name: &str, bytes: &[u8]) -> Result<(), PathSecurityError> {
327    let handle = Dir::open_ambient_dir(dir, ambient_authority()).map_err(|source| {
328        PathSecurityError::Io {
329            path: dir.to_path_buf(),
330            source,
331        }
332    })?;
333    let mut file = handle
334        .create(name)
335        .map_err(|source| PathSecurityError::Io {
336            path: dir.join(name),
337            source,
338        })?;
339    file.write_all(bytes)
340        .map_err(|source| PathSecurityError::Io {
341            path: dir.join(name),
342            source,
343        })?;
344    Ok(())
345}
346
347/// Lexically join a relative, attacker-influenced `candidate` onto `base`,
348/// returning `None` if the candidate would escape `base`.
349///
350/// This is a *string-level* primitive (see `skills/rust-path-security.md`): it
351/// performs no filesystem access and does not resolve symlinks — it is the
352/// auditable companion to the syscall-level confinement enforced by
353/// [`PathPolicy`] / [`cap_std`]. A candidate is rejected when it:
354///
355/// * contains an interior NUL byte;
356/// * is absolute or carries a path prefix (e.g. a Windows drive / UNC); or
357/// * contains a `..` that pops above `base` (balanced `a/../b` is fine).
358///
359/// On success the returned path is guaranteed to start with `base` and to
360/// contain no `..` component.
361#[must_use]
362pub fn safe_join(base: &Path, candidate: &str) -> Option<PathBuf> {
363    if candidate.as_bytes().contains(&0) {
364        return None;
365    }
366
367    let mut stack: Vec<std::ffi::OsString> = Vec::new();
368    for component in Path::new(candidate).components() {
369        match component {
370            Component::CurDir => {},
371            Component::Normal(segment) => stack.push(segment.to_os_string()),
372            Component::ParentDir => {
373                // A `..` that cannot be balanced by a previously pushed segment
374                // would escape `base`.
375                stack.pop()?;
376            },
377            Component::RootDir | Component::Prefix(_) => return None,
378        }
379    }
380
381    let mut resolved = base.to_path_buf();
382    for segment in &stack {
383        resolved.push(segment);
384    }
385
386    // Belt-and-braces post-conditions the fuzz target also asserts.
387    if !resolved.starts_with(base) {
388        return None;
389    }
390    if resolved
391        .components()
392        .any(|component| matches!(component, Component::ParentDir))
393    {
394        return None;
395    }
396    Some(resolved)
397}
398
399#[cfg(test)]
400mod tests {
401    use std::io::Write;
402
403    use super::*;
404
405    fn write_temp(dir: &Path, name: &str, bytes: &[u8]) -> PathBuf {
406        let path = dir.join(name);
407        let mut f = File::create(&path).expect("create temp file");
408        f.write_all(bytes).expect("write temp file");
409        path
410    }
411
412    #[test]
413    fn opens_and_reads_a_regular_file() {
414        let dir = tempfile::tempdir().unwrap();
415        let path = write_temp(dir.path(), "hello.txt", b"hello");
416
417        let policy = PathPolicy::new();
418        let opened = policy.open(&path).expect("open should succeed");
419        assert_eq!(opened.size, 5);
420        assert!(opened.path.is_absolute());
421
422        let bytes = policy.read(&path).expect("read should succeed");
423        assert_eq!(bytes, b"hello");
424    }
425
426    #[test]
427    fn empty_path_is_rejected() {
428        let policy = PathPolicy::new();
429        let err = policy.open(Path::new("")).unwrap_err();
430        assert!(matches!(err, PathSecurityError::EmptyPath));
431    }
432
433    #[test]
434    fn missing_file_is_io_error() {
435        let dir = tempfile::tempdir().unwrap();
436        let policy = PathPolicy::new();
437        let err = policy.open(&dir.path().join("nope")).unwrap_err();
438        assert!(matches!(err, PathSecurityError::Io { .. }), "got: {err:?}");
439    }
440
441    #[test]
442    fn directory_is_not_a_regular_file() {
443        let dir = tempfile::tempdir().unwrap();
444        let policy = PathPolicy::new();
445        let err = policy.open(dir.path()).unwrap_err();
446        assert!(
447            matches!(err, PathSecurityError::NotRegularFile(_)),
448            "got: {err:?}"
449        );
450    }
451
452    #[test]
453    fn oversize_file_is_rejected() {
454        let dir = tempfile::tempdir().unwrap();
455        let path = write_temp(dir.path(), "big.bin", b"0123456789");
456        let policy = PathPolicy::new().max_file_size(4);
457        let err = policy.open(&path).unwrap_err();
458        assert!(
459            matches!(err, PathSecurityError::TooLarge { limit: 4, .. }),
460            "got: {err:?}"
461        );
462    }
463
464    #[test]
465    fn read_is_capped() {
466        let dir = tempfile::tempdir().unwrap();
467        let path = write_temp(dir.path(), "data.bin", b"0123456789");
468        let policy = PathPolicy::new().max_file_size(4);
469        let err = policy.read(&path).unwrap_err();
470        assert!(matches!(err, PathSecurityError::TooLarge { .. }));
471    }
472
473    #[test]
474    fn read_capped_accepts_within_limit() {
475        let bytes = read_capped(&b"hello"[..], 10).unwrap();
476        assert_eq!(bytes, b"hello");
477    }
478
479    #[test]
480    fn read_capped_rejects_over_limit() {
481        let err = read_capped(&b"hello"[..], 3).unwrap_err();
482        assert!(matches!(err, PathSecurityError::TooLarge { limit: 3, .. }));
483    }
484
485    #[test]
486    fn base_dir_allows_contained_file() {
487        let dir = tempfile::tempdir().unwrap();
488        let path = write_temp(dir.path(), "inside.txt", b"ok");
489        let policy = PathPolicy::new().base_dir(dir.path());
490        let opened = policy.open(&path).expect("contained file should open");
491        assert!(opened.path.starts_with(dir.path().canonicalize().unwrap()));
492    }
493
494    #[test]
495    fn base_dir_rejects_parent_traversal() {
496        let base = tempfile::tempdir().unwrap();
497        let outside = tempfile::tempdir().unwrap();
498        let _secret = write_temp(outside.path(), "secret.txt", b"top secret");
499
500        let policy = PathPolicy::new().base_dir(base.path());
501        // Lexical traversal out of the base directory.
502        let traversal = base
503            .path()
504            .join("..")
505            .join(outside.path().file_name().unwrap());
506        let err = policy.open(&traversal.join("secret.txt")).unwrap_err();
507        assert!(
508            matches!(
509                err,
510                PathSecurityError::ParentTraversal(_) | PathSecurityError::OutsideBase { .. }
511            ),
512            "got: {err:?}"
513        );
514    }
515
516    #[cfg(unix)]
517    #[test]
518    fn base_dir_rejects_symlink_escape() {
519        use std::os::unix::fs::symlink;
520
521        let base = tempfile::tempdir().unwrap();
522        let outside = tempfile::tempdir().unwrap();
523        let secret = write_temp(outside.path(), "secret.txt", b"top secret");
524
525        let link = base.path().join("link.txt");
526        symlink(&secret, &link).unwrap();
527
528        // Symlinks allowed, but the resolved target escapes the base dir.
529        let policy = PathPolicy::new().base_dir(base.path());
530        let err = policy.open(&link).unwrap_err();
531        assert!(
532            matches!(err, PathSecurityError::OutsideBase { .. }),
533            "got: {err:?}"
534        );
535    }
536
537    #[cfg(unix)]
538    #[test]
539    fn symlinks_can_be_denied() {
540        use std::os::unix::fs::symlink;
541
542        let dir = tempfile::tempdir().unwrap();
543        let target = write_temp(dir.path(), "target.txt", b"data");
544        let link = dir.path().join("link.txt");
545        symlink(&target, &link).unwrap();
546
547        let policy = PathPolicy::new().allow_symlinks(false);
548        let err = policy.open(&link).unwrap_err();
549        assert!(
550            matches!(err, PathSecurityError::SymlinkDenied(_)),
551            "got: {err:?}"
552        );
553    }
554
555    #[cfg(unix)]
556    #[test]
557    fn interior_nul_is_rejected() {
558        use std::ffi::OsStr;
559        use std::os::unix::ffi::OsStrExt;
560
561        let policy = PathPolicy::new();
562        let path = Path::new(OsStr::from_bytes(b"a\0b"));
563        let err = policy.open(path).unwrap_err();
564        assert!(
565            matches!(err, PathSecurityError::InteriorNul),
566            "got: {err:?}"
567        );
568    }
569
570    // -- safe_join (lexical confinement) --------------------------------------
571
572    const BASE: &str = "/var/lib/simdutf8-cli/data";
573
574    fn base() -> PathBuf {
575        PathBuf::from(BASE)
576    }
577
578    #[test]
579    fn safe_join_accepts_well_formed_relative_paths() {
580        for candidate in [
581            "advisory.json",
582            "2026/001/file.json",
583            "./a/./b.json",
584            ".hidden",
585            "",
586        ] {
587            let resolved = safe_join(&base(), candidate)
588                .unwrap_or_else(|| panic!("expected accept for {candidate:?}"));
589            assert!(resolved.starts_with(base()), "{candidate:?} escaped base");
590            assert!(!resolved
591                .components()
592                .any(|c| matches!(c, Component::ParentDir)));
593        }
594    }
595
596    #[test]
597    fn safe_join_accepts_balanced_parent() {
598        assert_eq!(
599            safe_join(&base(), "a/../b.json"),
600            Some(base().join("b.json"))
601        );
602        assert_eq!(safe_join(&base(), "2026/.."), Some(base()));
603    }
604
605    #[test]
606    fn safe_join_rejects_traversal_and_absolute_and_nul() {
607        for candidate in [
608            "..",
609            "../etc/passwd",
610            "../../../../etc/passwd",
611            "2026/../../etc/passwd",
612            "/etc/passwd",
613            "advisory.json\0",
614            "a\0b",
615        ] {
616            assert!(
617                safe_join(&base(), candidate).is_none(),
618                "expected reject for {candidate:?}"
619            );
620        }
621    }
622
623    #[test]
624    fn safe_join_rejects_every_traversal_depth() {
625        for depth in 1..=64 {
626            let attack = "../".repeat(depth) + "etc/passwd";
627            assert!(
628                safe_join(&base(), &attack).is_none(),
629                "depth {depth} should be rejected"
630            );
631        }
632    }
633}