1use std::fs::File;
31use std::io::{Read, Write};
32use std::path::{Component, Path, PathBuf};
33
34use cap_std::ambient_authority;
35use cap_std::fs::Dir;
36
37pub const DEFAULT_MAX_FILE_SIZE: u64 = 64 * 1024 * 1024;
39
40#[derive(Debug, thiserror::Error)]
42pub enum PathSecurityError {
43 #[error("input path is empty")]
45 EmptyPath,
46
47 #[error("input path contains an interior NUL byte")]
49 InteriorNul,
50
51 #[error("input path contains a '..' component, which is not allowed with --base-dir: {}", .0.display())]
53 ParentTraversal(PathBuf),
54
55 #[error("resolved path {} escapes the permitted base directory {}", .path.display(), .base.display())]
57 OutsideBase {
58 path: PathBuf,
60 base: PathBuf,
62 },
63
64 #[error("symbolic links are not permitted: {}", .0.display())]
66 SymlinkDenied(PathBuf),
67
68 #[error("not a regular file: {}", .0.display())]
70 NotRegularFile(PathBuf),
71
72 #[error("input is too large: {size} bytes exceeds the {limit} byte limit")]
74 TooLarge {
75 size: u64,
77 limit: u64,
79 },
80
81 #[error("failed to access {}: {source}", .path.display())]
83 Io {
84 path: PathBuf,
86 #[source]
88 source: std::io::Error,
89 },
90}
91
92#[derive(Debug)]
94pub struct OpenedFile {
95 pub file: File,
97 pub path: PathBuf,
99 pub size: u64,
101}
102
103#[derive(Clone, Debug)]
108pub struct PathPolicy {
109 base_dir: Option<PathBuf>,
110 allow_symlinks: bool,
111 max_file_size: u64,
112}
113
114impl Default for PathPolicy {
115 fn default() -> Self {
116 Self::new()
117 }
118}
119
120impl PathPolicy {
121 #[must_use]
125 pub const fn new() -> Self {
126 Self {
127 base_dir: None,
128 allow_symlinks: true,
129 max_file_size: DEFAULT_MAX_FILE_SIZE,
130 }
131 }
132
133 #[must_use]
135 pub fn base_dir(mut self, base: impl Into<PathBuf>) -> Self {
136 self.base_dir = Some(base.into());
137 self
138 }
139
140 #[must_use]
142 pub const fn allow_symlinks(mut self, allow: bool) -> Self {
143 self.allow_symlinks = allow;
144 self
145 }
146
147 #[must_use]
149 pub const fn max_file_size(mut self, limit: u64) -> Self {
150 self.max_file_size = limit;
151 self
152 }
153
154 #[must_use]
156 pub const fn limit(&self) -> u64 {
157 self.max_file_size
158 }
159
160 pub fn open(&self, requested: &Path) -> Result<OpenedFile, PathSecurityError> {
169 let canonical = self.resolve_path(requested)?;
170
171 let file = File::open(&canonical).map_err(|source| PathSecurityError::Io {
175 path: canonical.clone(),
176 source,
177 })?;
178 let meta = file.metadata().map_err(|source| PathSecurityError::Io {
179 path: canonical.clone(),
180 source,
181 })?;
182 if !meta.is_file() {
183 return Err(PathSecurityError::NotRegularFile(canonical));
184 }
185
186 let size = meta.len();
188 if size > self.max_file_size {
189 return Err(PathSecurityError::TooLarge {
190 size,
191 limit: self.max_file_size,
192 });
193 }
194
195 Ok(OpenedFile {
196 file,
197 path: canonical,
198 size,
199 })
200 }
201
202 fn resolve_path(&self, requested: &Path) -> Result<PathBuf, PathSecurityError> {
210 if requested.as_os_str().is_empty() {
212 return Err(PathSecurityError::EmptyPath);
213 }
214
215 if requested.as_os_str().as_encoded_bytes().contains(&0) {
217 return Err(PathSecurityError::InteriorNul);
218 }
219
220 if self.base_dir.is_some()
223 && requested
224 .components()
225 .any(|component| matches!(component, Component::ParentDir))
226 {
227 return Err(PathSecurityError::ParentTraversal(requested.to_path_buf()));
228 }
229
230 if !self.allow_symlinks {
232 let meta =
233 std::fs::symlink_metadata(requested).map_err(|source| PathSecurityError::Io {
234 path: requested.to_path_buf(),
235 source,
236 })?;
237 if meta.file_type().is_symlink() {
238 return Err(PathSecurityError::SymlinkDenied(requested.to_path_buf()));
239 }
240 }
241
242 let canonical = requested
245 .canonicalize()
246 .map_err(|source| PathSecurityError::Io {
247 path: requested.to_path_buf(),
248 source,
249 })?;
250
251 if let Some(base) = &self.base_dir {
253 let canonical_base = base
254 .canonicalize()
255 .map_err(|source| PathSecurityError::Io {
256 path: base.clone(),
257 source,
258 })?;
259 if !canonical.starts_with(&canonical_base) {
260 return Err(PathSecurityError::OutsideBase {
261 path: canonical,
262 base: canonical_base,
263 });
264 }
265 }
266
267 Ok(canonical)
268 }
269
270 pub fn read(&self, requested: &Path) -> Result<Vec<u8>, PathSecurityError> {
277 let OpenedFile { file, path, .. } = self.open(requested)?;
278 read_capped(file, self.max_file_size).map_err(move |error| match error {
279 PathSecurityError::Io { source, .. } => PathSecurityError::Io { path, source },
281 other => other,
282 })
283 }
284}
285
286pub fn read_capped<R: Read>(reader: R, limit: u64) -> Result<Vec<u8>, PathSecurityError> {
296 let mut limited = reader.take(limit.saturating_add(1));
299 let mut buf = Vec::new();
300 limited
301 .read_to_end(&mut buf)
302 .map_err(|source| PathSecurityError::Io {
303 path: PathBuf::from("<stream>"),
304 source,
305 })?;
306
307 let len = u64::try_from(buf.len()).unwrap_or(u64::MAX);
308 if len > limit {
309 return Err(PathSecurityError::TooLarge { size: len, limit });
310 }
311 Ok(buf)
312}
313
314pub fn write_in_dir(dir: &Path, name: &str, bytes: &[u8]) -> Result<(), PathSecurityError> {
327 let handle = Dir::open_ambient_dir(dir, ambient_authority()).map_err(|source| {
328 PathSecurityError::Io {
329 path: dir.to_path_buf(),
330 source,
331 }
332 })?;
333 let mut file = handle
334 .create(name)
335 .map_err(|source| PathSecurityError::Io {
336 path: dir.join(name),
337 source,
338 })?;
339 file.write_all(bytes)
340 .map_err(|source| PathSecurityError::Io {
341 path: dir.join(name),
342 source,
343 })?;
344 Ok(())
345}
346
347#[must_use]
362pub fn safe_join(base: &Path, candidate: &str) -> Option<PathBuf> {
363 if candidate.as_bytes().contains(&0) {
364 return None;
365 }
366
367 let mut stack: Vec<std::ffi::OsString> = Vec::new();
368 for component in Path::new(candidate).components() {
369 match component {
370 Component::CurDir => {},
371 Component::Normal(segment) => stack.push(segment.to_os_string()),
372 Component::ParentDir => {
373 stack.pop()?;
376 },
377 Component::RootDir | Component::Prefix(_) => return None,
378 }
379 }
380
381 let mut resolved = base.to_path_buf();
382 for segment in &stack {
383 resolved.push(segment);
384 }
385
386 if !resolved.starts_with(base) {
388 return None;
389 }
390 if resolved
391 .components()
392 .any(|component| matches!(component, Component::ParentDir))
393 {
394 return None;
395 }
396 Some(resolved)
397}
398
399#[cfg(test)]
400mod tests {
401 use std::io::Write;
402
403 use super::*;
404
405 fn write_temp(dir: &Path, name: &str, bytes: &[u8]) -> PathBuf {
406 let path = dir.join(name);
407 let mut f = File::create(&path).expect("create temp file");
408 f.write_all(bytes).expect("write temp file");
409 path
410 }
411
412 #[test]
413 fn opens_and_reads_a_regular_file() {
414 let dir = tempfile::tempdir().unwrap();
415 let path = write_temp(dir.path(), "hello.txt", b"hello");
416
417 let policy = PathPolicy::new();
418 let opened = policy.open(&path).expect("open should succeed");
419 assert_eq!(opened.size, 5);
420 assert!(opened.path.is_absolute());
421
422 let bytes = policy.read(&path).expect("read should succeed");
423 assert_eq!(bytes, b"hello");
424 }
425
426 #[test]
427 fn empty_path_is_rejected() {
428 let policy = PathPolicy::new();
429 let err = policy.open(Path::new("")).unwrap_err();
430 assert!(matches!(err, PathSecurityError::EmptyPath));
431 }
432
433 #[test]
434 fn missing_file_is_io_error() {
435 let dir = tempfile::tempdir().unwrap();
436 let policy = PathPolicy::new();
437 let err = policy.open(&dir.path().join("nope")).unwrap_err();
438 assert!(matches!(err, PathSecurityError::Io { .. }), "got: {err:?}");
439 }
440
441 #[test]
442 fn directory_is_not_a_regular_file() {
443 let dir = tempfile::tempdir().unwrap();
444 let policy = PathPolicy::new();
445 let err = policy.open(dir.path()).unwrap_err();
446 assert!(
447 matches!(err, PathSecurityError::NotRegularFile(_)),
448 "got: {err:?}"
449 );
450 }
451
452 #[test]
453 fn oversize_file_is_rejected() {
454 let dir = tempfile::tempdir().unwrap();
455 let path = write_temp(dir.path(), "big.bin", b"0123456789");
456 let policy = PathPolicy::new().max_file_size(4);
457 let err = policy.open(&path).unwrap_err();
458 assert!(
459 matches!(err, PathSecurityError::TooLarge { limit: 4, .. }),
460 "got: {err:?}"
461 );
462 }
463
464 #[test]
465 fn read_is_capped() {
466 let dir = tempfile::tempdir().unwrap();
467 let path = write_temp(dir.path(), "data.bin", b"0123456789");
468 let policy = PathPolicy::new().max_file_size(4);
469 let err = policy.read(&path).unwrap_err();
470 assert!(matches!(err, PathSecurityError::TooLarge { .. }));
471 }
472
473 #[test]
474 fn read_capped_accepts_within_limit() {
475 let bytes = read_capped(&b"hello"[..], 10).unwrap();
476 assert_eq!(bytes, b"hello");
477 }
478
479 #[test]
480 fn read_capped_rejects_over_limit() {
481 let err = read_capped(&b"hello"[..], 3).unwrap_err();
482 assert!(matches!(err, PathSecurityError::TooLarge { limit: 3, .. }));
483 }
484
485 #[test]
486 fn base_dir_allows_contained_file() {
487 let dir = tempfile::tempdir().unwrap();
488 let path = write_temp(dir.path(), "inside.txt", b"ok");
489 let policy = PathPolicy::new().base_dir(dir.path());
490 let opened = policy.open(&path).expect("contained file should open");
491 assert!(opened.path.starts_with(dir.path().canonicalize().unwrap()));
492 }
493
494 #[test]
495 fn base_dir_rejects_parent_traversal() {
496 let base = tempfile::tempdir().unwrap();
497 let outside = tempfile::tempdir().unwrap();
498 let _secret = write_temp(outside.path(), "secret.txt", b"top secret");
499
500 let policy = PathPolicy::new().base_dir(base.path());
501 let traversal = base
503 .path()
504 .join("..")
505 .join(outside.path().file_name().unwrap());
506 let err = policy.open(&traversal.join("secret.txt")).unwrap_err();
507 assert!(
508 matches!(
509 err,
510 PathSecurityError::ParentTraversal(_) | PathSecurityError::OutsideBase { .. }
511 ),
512 "got: {err:?}"
513 );
514 }
515
516 #[cfg(unix)]
517 #[test]
518 fn base_dir_rejects_symlink_escape() {
519 use std::os::unix::fs::symlink;
520
521 let base = tempfile::tempdir().unwrap();
522 let outside = tempfile::tempdir().unwrap();
523 let secret = write_temp(outside.path(), "secret.txt", b"top secret");
524
525 let link = base.path().join("link.txt");
526 symlink(&secret, &link).unwrap();
527
528 let policy = PathPolicy::new().base_dir(base.path());
530 let err = policy.open(&link).unwrap_err();
531 assert!(
532 matches!(err, PathSecurityError::OutsideBase { .. }),
533 "got: {err:?}"
534 );
535 }
536
537 #[cfg(unix)]
538 #[test]
539 fn symlinks_can_be_denied() {
540 use std::os::unix::fs::symlink;
541
542 let dir = tempfile::tempdir().unwrap();
543 let target = write_temp(dir.path(), "target.txt", b"data");
544 let link = dir.path().join("link.txt");
545 symlink(&target, &link).unwrap();
546
547 let policy = PathPolicy::new().allow_symlinks(false);
548 let err = policy.open(&link).unwrap_err();
549 assert!(
550 matches!(err, PathSecurityError::SymlinkDenied(_)),
551 "got: {err:?}"
552 );
553 }
554
555 #[cfg(unix)]
556 #[test]
557 fn interior_nul_is_rejected() {
558 use std::ffi::OsStr;
559 use std::os::unix::ffi::OsStrExt;
560
561 let policy = PathPolicy::new();
562 let path = Path::new(OsStr::from_bytes(b"a\0b"));
563 let err = policy.open(path).unwrap_err();
564 assert!(
565 matches!(err, PathSecurityError::InteriorNul),
566 "got: {err:?}"
567 );
568 }
569
570 const BASE: &str = "/var/lib/simdutf8-cli/data";
573
574 fn base() -> PathBuf {
575 PathBuf::from(BASE)
576 }
577
578 #[test]
579 fn safe_join_accepts_well_formed_relative_paths() {
580 for candidate in [
581 "advisory.json",
582 "2026/001/file.json",
583 "./a/./b.json",
584 ".hidden",
585 "",
586 ] {
587 let resolved = safe_join(&base(), candidate)
588 .unwrap_or_else(|| panic!("expected accept for {candidate:?}"));
589 assert!(resolved.starts_with(base()), "{candidate:?} escaped base");
590 assert!(!resolved
591 .components()
592 .any(|c| matches!(c, Component::ParentDir)));
593 }
594 }
595
596 #[test]
597 fn safe_join_accepts_balanced_parent() {
598 assert_eq!(
599 safe_join(&base(), "a/../b.json"),
600 Some(base().join("b.json"))
601 );
602 assert_eq!(safe_join(&base(), "2026/.."), Some(base()));
603 }
604
605 #[test]
606 fn safe_join_rejects_traversal_and_absolute_and_nul() {
607 for candidate in [
608 "..",
609 "../etc/passwd",
610 "../../../../etc/passwd",
611 "2026/../../etc/passwd",
612 "/etc/passwd",
613 "advisory.json\0",
614 "a\0b",
615 ] {
616 assert!(
617 safe_join(&base(), candidate).is_none(),
618 "expected reject for {candidate:?}"
619 );
620 }
621 }
622
623 #[test]
624 fn safe_join_rejects_every_traversal_depth() {
625 for depth in 1..=64 {
626 let attack = "../".repeat(depth) + "etc/passwd";
627 assert!(
628 safe_join(&base(), &attack).is_none(),
629 "depth {depth} should be rejected"
630 );
631 }
632 }
633}