use std::fs::File;
use std::io::{Read, Write};
use std::path::{Component, Path, PathBuf};
use cap_std::ambient_authority;
use cap_std::fs::Dir;
pub const DEFAULT_MAX_FILE_SIZE: u64 = 64 * 1024 * 1024;
#[derive(Debug, thiserror::Error)]
pub enum PathSecurityError {
#[error("input path is empty")]
EmptyPath,
#[error("input path contains an interior NUL byte")]
InteriorNul,
#[error("input path contains a '..' component, which is not allowed with --base-dir: {}", .0.display())]
ParentTraversal(PathBuf),
#[error("resolved path {} escapes the permitted base directory {}", .path.display(), .base.display())]
OutsideBase {
path: PathBuf,
base: PathBuf,
},
#[error("symbolic links are not permitted: {}", .0.display())]
SymlinkDenied(PathBuf),
#[error("not a regular file: {}", .0.display())]
NotRegularFile(PathBuf),
#[error("input is too large: {size} bytes exceeds the {limit} byte limit")]
TooLarge {
size: u64,
limit: u64,
},
#[error("failed to access {}: {source}", .path.display())]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
}
#[derive(Debug)]
pub struct OpenedFile {
pub file: File,
pub path: PathBuf,
pub size: u64,
}
#[derive(Clone, Debug)]
pub struct PathPolicy {
base_dir: Option<PathBuf>,
allow_symlinks: bool,
max_file_size: u64,
}
impl Default for PathPolicy {
fn default() -> Self {
Self::new()
}
}
impl PathPolicy {
#[must_use]
pub const fn new() -> Self {
Self {
base_dir: None,
allow_symlinks: true,
max_file_size: DEFAULT_MAX_FILE_SIZE,
}
}
#[must_use]
pub fn base_dir(mut self, base: impl Into<PathBuf>) -> Self {
self.base_dir = Some(base.into());
self
}
#[must_use]
pub const fn allow_symlinks(mut self, allow: bool) -> Self {
self.allow_symlinks = allow;
self
}
#[must_use]
pub const fn max_file_size(mut self, limit: u64) -> Self {
self.max_file_size = limit;
self
}
#[must_use]
pub const fn limit(&self) -> u64 {
self.max_file_size
}
pub fn open(&self, requested: &Path) -> Result<OpenedFile, PathSecurityError> {
let canonical = self.resolve_path(requested)?;
let file = File::open(&canonical).map_err(|source| PathSecurityError::Io {
path: canonical.clone(),
source,
})?;
let meta = file.metadata().map_err(|source| PathSecurityError::Io {
path: canonical.clone(),
source,
})?;
if !meta.is_file() {
return Err(PathSecurityError::NotRegularFile(canonical));
}
let size = meta.len();
if size > self.max_file_size {
return Err(PathSecurityError::TooLarge {
size,
limit: self.max_file_size,
});
}
Ok(OpenedFile {
file,
path: canonical,
size,
})
}
fn resolve_path(&self, requested: &Path) -> Result<PathBuf, PathSecurityError> {
if requested.as_os_str().is_empty() {
return Err(PathSecurityError::EmptyPath);
}
if requested.as_os_str().as_encoded_bytes().contains(&0) {
return Err(PathSecurityError::InteriorNul);
}
if self.base_dir.is_some()
&& requested
.components()
.any(|component| matches!(component, Component::ParentDir))
{
return Err(PathSecurityError::ParentTraversal(requested.to_path_buf()));
}
if !self.allow_symlinks {
let meta =
std::fs::symlink_metadata(requested).map_err(|source| PathSecurityError::Io {
path: requested.to_path_buf(),
source,
})?;
if meta.file_type().is_symlink() {
return Err(PathSecurityError::SymlinkDenied(requested.to_path_buf()));
}
}
let canonical = requested
.canonicalize()
.map_err(|source| PathSecurityError::Io {
path: requested.to_path_buf(),
source,
})?;
if let Some(base) = &self.base_dir {
let canonical_base = base
.canonicalize()
.map_err(|source| PathSecurityError::Io {
path: base.clone(),
source,
})?;
if !canonical.starts_with(&canonical_base) {
return Err(PathSecurityError::OutsideBase {
path: canonical,
base: canonical_base,
});
}
}
Ok(canonical)
}
pub fn read(&self, requested: &Path) -> Result<Vec<u8>, PathSecurityError> {
let OpenedFile { file, path, .. } = self.open(requested)?;
read_capped(file, self.max_file_size).map_err(move |error| match error {
PathSecurityError::Io { source, .. } => PathSecurityError::Io { path, source },
other => other,
})
}
}
pub fn read_capped<R: Read>(reader: R, limit: u64) -> Result<Vec<u8>, PathSecurityError> {
let mut limited = reader.take(limit.saturating_add(1));
let mut buf = Vec::new();
limited
.read_to_end(&mut buf)
.map_err(|source| PathSecurityError::Io {
path: PathBuf::from("<stream>"),
source,
})?;
let len = u64::try_from(buf.len()).unwrap_or(u64::MAX);
if len > limit {
return Err(PathSecurityError::TooLarge { size: len, limit });
}
Ok(buf)
}
pub fn write_in_dir(dir: &Path, name: &str, bytes: &[u8]) -> Result<(), PathSecurityError> {
let handle = Dir::open_ambient_dir(dir, ambient_authority()).map_err(|source| {
PathSecurityError::Io {
path: dir.to_path_buf(),
source,
}
})?;
let mut file = handle
.create(name)
.map_err(|source| PathSecurityError::Io {
path: dir.join(name),
source,
})?;
file.write_all(bytes)
.map_err(|source| PathSecurityError::Io {
path: dir.join(name),
source,
})?;
Ok(())
}
#[must_use]
pub fn safe_join(base: &Path, candidate: &str) -> Option<PathBuf> {
if candidate.as_bytes().contains(&0) {
return None;
}
let mut stack: Vec<std::ffi::OsString> = Vec::new();
for component in Path::new(candidate).components() {
match component {
Component::CurDir => {},
Component::Normal(segment) => stack.push(segment.to_os_string()),
Component::ParentDir => {
stack.pop()?;
},
Component::RootDir | Component::Prefix(_) => return None,
}
}
let mut resolved = base.to_path_buf();
for segment in &stack {
resolved.push(segment);
}
if !resolved.starts_with(base) {
return None;
}
if resolved
.components()
.any(|component| matches!(component, Component::ParentDir))
{
return None;
}
Some(resolved)
}
#[cfg(test)]
mod tests {
use std::io::Write;
use super::*;
fn write_temp(dir: &Path, name: &str, bytes: &[u8]) -> PathBuf {
let path = dir.join(name);
let mut f = File::create(&path).expect("create temp file");
f.write_all(bytes).expect("write temp file");
path
}
#[test]
fn opens_and_reads_a_regular_file() {
let dir = tempfile::tempdir().unwrap();
let path = write_temp(dir.path(), "hello.txt", b"hello");
let policy = PathPolicy::new();
let opened = policy.open(&path).expect("open should succeed");
assert_eq!(opened.size, 5);
assert!(opened.path.is_absolute());
let bytes = policy.read(&path).expect("read should succeed");
assert_eq!(bytes, b"hello");
}
#[test]
fn empty_path_is_rejected() {
let policy = PathPolicy::new();
let err = policy.open(Path::new("")).unwrap_err();
assert!(matches!(err, PathSecurityError::EmptyPath));
}
#[test]
fn missing_file_is_io_error() {
let dir = tempfile::tempdir().unwrap();
let policy = PathPolicy::new();
let err = policy.open(&dir.path().join("nope")).unwrap_err();
assert!(matches!(err, PathSecurityError::Io { .. }), "got: {err:?}");
}
#[test]
fn directory_is_not_a_regular_file() {
let dir = tempfile::tempdir().unwrap();
let policy = PathPolicy::new();
let err = policy.open(dir.path()).unwrap_err();
assert!(
matches!(err, PathSecurityError::NotRegularFile(_)),
"got: {err:?}"
);
}
#[test]
fn oversize_file_is_rejected() {
let dir = tempfile::tempdir().unwrap();
let path = write_temp(dir.path(), "big.bin", b"0123456789");
let policy = PathPolicy::new().max_file_size(4);
let err = policy.open(&path).unwrap_err();
assert!(
matches!(err, PathSecurityError::TooLarge { limit: 4, .. }),
"got: {err:?}"
);
}
#[test]
fn read_is_capped() {
let dir = tempfile::tempdir().unwrap();
let path = write_temp(dir.path(), "data.bin", b"0123456789");
let policy = PathPolicy::new().max_file_size(4);
let err = policy.read(&path).unwrap_err();
assert!(matches!(err, PathSecurityError::TooLarge { .. }));
}
#[test]
fn read_capped_accepts_within_limit() {
let bytes = read_capped(&b"hello"[..], 10).unwrap();
assert_eq!(bytes, b"hello");
}
#[test]
fn read_capped_rejects_over_limit() {
let err = read_capped(&b"hello"[..], 3).unwrap_err();
assert!(matches!(err, PathSecurityError::TooLarge { limit: 3, .. }));
}
#[test]
fn base_dir_allows_contained_file() {
let dir = tempfile::tempdir().unwrap();
let path = write_temp(dir.path(), "inside.txt", b"ok");
let policy = PathPolicy::new().base_dir(dir.path());
let opened = policy.open(&path).expect("contained file should open");
assert!(opened.path.starts_with(dir.path().canonicalize().unwrap()));
}
#[test]
fn base_dir_rejects_parent_traversal() {
let base = tempfile::tempdir().unwrap();
let outside = tempfile::tempdir().unwrap();
let _secret = write_temp(outside.path(), "secret.txt", b"top secret");
let policy = PathPolicy::new().base_dir(base.path());
let traversal = base
.path()
.join("..")
.join(outside.path().file_name().unwrap());
let err = policy.open(&traversal.join("secret.txt")).unwrap_err();
assert!(
matches!(
err,
PathSecurityError::ParentTraversal(_) | PathSecurityError::OutsideBase { .. }
),
"got: {err:?}"
);
}
#[cfg(unix)]
#[test]
fn base_dir_rejects_symlink_escape() {
use std::os::unix::fs::symlink;
let base = tempfile::tempdir().unwrap();
let outside = tempfile::tempdir().unwrap();
let secret = write_temp(outside.path(), "secret.txt", b"top secret");
let link = base.path().join("link.txt");
symlink(&secret, &link).unwrap();
let policy = PathPolicy::new().base_dir(base.path());
let err = policy.open(&link).unwrap_err();
assert!(
matches!(err, PathSecurityError::OutsideBase { .. }),
"got: {err:?}"
);
}
#[cfg(unix)]
#[test]
fn symlinks_can_be_denied() {
use std::os::unix::fs::symlink;
let dir = tempfile::tempdir().unwrap();
let target = write_temp(dir.path(), "target.txt", b"data");
let link = dir.path().join("link.txt");
symlink(&target, &link).unwrap();
let policy = PathPolicy::new().allow_symlinks(false);
let err = policy.open(&link).unwrap_err();
assert!(
matches!(err, PathSecurityError::SymlinkDenied(_)),
"got: {err:?}"
);
}
#[cfg(unix)]
#[test]
fn interior_nul_is_rejected() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let policy = PathPolicy::new();
let path = Path::new(OsStr::from_bytes(b"a\0b"));
let err = policy.open(path).unwrap_err();
assert!(
matches!(err, PathSecurityError::InteriorNul),
"got: {err:?}"
);
}
const BASE: &str = "/var/lib/simdutf8-cli/data";
fn base() -> PathBuf {
PathBuf::from(BASE)
}
#[test]
fn safe_join_accepts_well_formed_relative_paths() {
for candidate in [
"advisory.json",
"2026/001/file.json",
"./a/./b.json",
".hidden",
"",
] {
let resolved = safe_join(&base(), candidate)
.unwrap_or_else(|| panic!("expected accept for {candidate:?}"));
assert!(resolved.starts_with(base()), "{candidate:?} escaped base");
assert!(!resolved
.components()
.any(|c| matches!(c, Component::ParentDir)));
}
}
#[test]
fn safe_join_accepts_balanced_parent() {
assert_eq!(
safe_join(&base(), "a/../b.json"),
Some(base().join("b.json"))
);
assert_eq!(safe_join(&base(), "2026/.."), Some(base()));
}
#[test]
fn safe_join_rejects_traversal_and_absolute_and_nul() {
for candidate in [
"..",
"../etc/passwd",
"../../../../etc/passwd",
"2026/../../etc/passwd",
"/etc/passwd",
"advisory.json\0",
"a\0b",
] {
assert!(
safe_join(&base(), candidate).is_none(),
"expected reject for {candidate:?}"
);
}
}
#[test]
fn safe_join_rejects_every_traversal_depth() {
for depth in 1..=64 {
let attack = "../".repeat(depth) + "etc/passwd";
assert!(
safe_join(&base(), &attack).is_none(),
"depth {depth} should be rejected"
);
}
}
}