use std::borrow::{Borrow, Cow};
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::path::{Path, PathBuf};
use crate::BaleError;
const RESERVED_PREFIX: &str = ".bale";
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ArchivePath<'a>(Cow<'a, [u8]>);
impl<'a> ArchivePath<'a> {
#[must_use]
pub fn as_str(&self) -> Option<&str> {
self.to_str_checked().ok()
}
#[must_use]
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
#[must_use]
pub fn len(&self) -> usize {
self.0.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[must_use]
pub fn from_bytes(bytes: &'a [u8]) -> Self {
Self(Cow::Borrowed(bytes))
}
#[must_use]
pub fn from_null_padded_bytes(bytes: &'a [u8]) -> Self {
let trimmed = bytes
.iter()
.position(|&b| b == 0)
.map_or(bytes, |pos| &bytes[..pos]);
Self(Cow::Borrowed(trimmed))
}
pub fn to_str_checked(&self) -> Result<&str, BaleError> {
Ok(std::str::from_utf8(&self.0)?)
}
#[must_use]
pub fn file_name(&self) -> &[u8] {
self.0
.iter()
.rposition(|&b| b == b'/')
.map_or(&self.0[..], |pos| &self.0[pos + 1..])
}
pub fn file_name_str(&self) -> Result<&str, BaleError> {
Ok(std::str::from_utf8(self.file_name())?)
}
pub fn with_suffix(&self, number: usize) -> Result<ArchivePath<'static>, BaleError> {
let path = self.to_str_checked()?;
let filename_start = path.rfind('/').map_or(0, |pos| pos + 1);
let (dir, filename) = path.split_at(filename_start);
let result = if let Some(dot_pos) = filename.rfind('.') {
let (stem, ext) = filename.split_at(dot_pos);
format!("{dir}{stem}({number}){ext}")
} else {
format!("{path}({number})")
};
Ok(ArchivePath(Cow::Owned(result.into_bytes())))
}
#[must_use]
pub fn into_owned(self) -> ArchivePath<'static> {
ArchivePath(Cow::Owned(self.0.into_owned()))
}
pub fn normalize(&self) -> Result<ArchivePath<'static>, BaleError> {
let s = self.as_str().ok_or(BaleError::InvalidPath)?;
Ok(ArchivePath(Cow::Owned(
Self::normalize_bytes(s)?.into_owned(),
)))
}
pub fn into_normalized(self) -> Result<ArchivePath<'static>, BaleError> {
self.normalize()
}
fn normalize_bytes(path: &str) -> Result<Cow<'_, [u8]>, BaleError> {
let trimmed = path.trim();
if Self::is_normalized(trimmed) {
Self::validate_safename(trimmed)?;
return Ok(Cow::Borrowed(trimmed.as_bytes()));
}
let mut components: Vec<&str> = Vec::new();
for part in trimmed.split(['/', '\\']) {
match part {
"" | "." => {}
".." => {
if components.pop().is_none() {
return Err(BaleError::InvalidPath);
}
}
component if component.trim().is_empty() => {
return Err(BaleError::InvalidPath);
}
component => {
components.push(component);
}
}
}
if components.is_empty() {
return Err(BaleError::InvalidPath);
}
for component in &components {
safename::validate_file(component)?;
Self::check_reserved(component)?;
}
Ok(Cow::Owned(components.join("/").into_bytes()))
}
fn validate_safename(path: &str) -> Result<(), BaleError> {
for component in path.split('/') {
safename::validate_file(component)?;
Self::check_reserved(component)?;
}
Ok(())
}
fn check_reserved(component: &str) -> Result<(), BaleError> {
if component.starts_with(RESERVED_PREFIX) {
return Err(BaleError::ReservedPath(component.to_string()));
}
Ok(())
}
fn is_normalized(path: &str) -> bool {
if path.is_empty() {
return false;
}
if path.contains('\\') {
return false;
}
if path.starts_with('/') || path.ends_with('/') {
return false;
}
for component in path.split('/') {
if component.is_empty()
|| component.trim().is_empty()
|| component == "."
|| component == ".."
{
return false;
}
}
true
}
}
#[allow(clippy::disallowed_methods)]
impl fmt::Display for ArchivePath<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", String::from_utf8_lossy(&self.0))
}
}
impl AsRef<[u8]> for ArchivePath<'_> {
fn as_ref(&self) -> &[u8] {
&self.0
}
}
impl Borrow<[u8]> for ArchivePath<'_> {
fn borrow(&self) -> &[u8] {
&self.0
}
}
impl From<Vec<u8>> for ArchivePath<'static> {
fn from(bytes: Vec<u8>) -> Self {
Self(Cow::Owned(bytes))
}
}
impl<'a> From<&'a [u8]> for ArchivePath<'a> {
fn from(bytes: &'a [u8]) -> Self {
Self::from_bytes(bytes)
}
}
impl From<ArchivePath<'_>> for Vec<u8> {
fn from(path: ArchivePath<'_>) -> Self {
path.0.into_owned()
}
}
impl TryFrom<&str> for ArchivePath<'static> {
type Error = BaleError;
fn try_from(s: &str) -> Result<Self, Self::Error> {
Ok(Self(Cow::Owned(Self::normalize_bytes(s)?.into_owned())))
}
}
impl TryFrom<String> for ArchivePath<'static> {
type Error = BaleError;
fn try_from(s: String) -> Result<Self, Self::Error> {
Ok(Self(Cow::Owned(Self::normalize_bytes(&s)?.into_owned())))
}
}
impl TryFrom<&Path> for ArchivePath<'static> {
type Error = BaleError;
fn try_from(path: &Path) -> Result<Self, Self::Error> {
let s = path.to_str().ok_or(BaleError::InvalidPath)?;
Ok(Self(Cow::Owned(Self::normalize_bytes(s)?.into_owned())))
}
}
impl TryFrom<PathBuf> for ArchivePath<'static> {
type Error = BaleError;
fn try_from(path: PathBuf) -> Result<Self, Self::Error> {
ArchivePath::try_from(path.as_path())
}
}
impl TryFrom<&OsStr> for ArchivePath<'static> {
type Error = BaleError;
fn try_from(s: &OsStr) -> Result<Self, Self::Error> {
ArchivePath::try_from(Path::new(s))
}
}
impl TryFrom<OsString> for ArchivePath<'static> {
type Error = BaleError;
fn try_from(s: OsString) -> Result<Self, Self::Error> {
ArchivePath::try_from(s.as_os_str())
}
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
use crate::proptest_config;
#[test]
fn backslashes_converted() {
let path = ArchivePath::try_from("foo\\bar\\baz").unwrap();
assert_eq!(path.as_str(), Some("foo/bar/baz"));
}
#[test]
fn dotdot_resolves_parent() {
let path = ArchivePath::try_from("foo/bar/../baz").unwrap();
assert_eq!(path.as_str(), Some("foo/baz"));
}
#[test]
fn mixed_slashes_with_dotdot() {
let path = ArchivePath::try_from("foo/bar\\baz/../qux").unwrap();
assert_eq!(path.as_str(), Some("foo/bar/qux"));
}
#[test]
fn from_raw_bytes() {
let path = ArchivePath::from(b"foo/bar".to_vec());
assert_eq!(path.as_str(), Some("foo/bar"));
assert_eq!(path.as_bytes(), b"foo/bar");
}
#[test]
fn from_bytes_borrows() {
let bytes = b"foo/bar";
let path = ArchivePath::from_bytes(bytes);
assert!(std::ptr::eq(path.as_bytes().as_ptr(), bytes.as_ptr()));
}
#[test]
fn from_slice_borrows() {
let bytes: &[u8] = b"foo/bar";
let path = ArchivePath::from(bytes);
assert!(std::ptr::eq(path.as_bytes().as_ptr(), bytes.as_ptr()));
}
#[test]
fn into_owned_works() {
let bytes = b"foo/bar";
let borrowed = ArchivePath::from_bytes(bytes);
let owned = borrowed.into_owned();
assert_eq!(owned.as_str(), Some("foo/bar"));
assert!(!std::ptr::eq(owned.as_bytes().as_ptr(), bytes.as_ptr()));
}
#[test]
fn into_normalized_works() {
let raw = ArchivePath::from(b"foo\\bar/../baz".to_vec());
let user = ArchivePath::try_from("foo/baz").unwrap();
assert_ne!(raw, user);
assert_eq!(raw.into_normalized().unwrap(), user);
}
#[test]
fn is_normalized_detects_normalized() {
assert!(ArchivePath::is_normalized("foo"));
assert!(ArchivePath::is_normalized("foo/bar"));
assert!(ArchivePath::is_normalized("foo/bar/baz"));
assert!(ArchivePath::is_normalized("a/b/c/d/e"));
}
#[test]
fn is_normalized_detects_unnormalized() {
assert!(!ArchivePath::is_normalized("foo\\bar"));
assert!(!ArchivePath::is_normalized("/foo"));
assert!(!ArchivePath::is_normalized("foo/"));
assert!(!ArchivePath::is_normalized("foo//bar"));
assert!(!ArchivePath::is_normalized("foo/./bar"));
assert!(!ArchivePath::is_normalized("foo/../bar"));
assert!(!ArchivePath::is_normalized(""));
assert!(!ArchivePath::is_normalized("foo/ /bar"));
assert!(!ArchivePath::is_normalized("foo/\t/bar"));
assert!(!ArchivePath::is_normalized(" "));
}
#[test]
fn normalize_bytes_borrows_when_unchanged() {
let input = "foo/bar/baz";
let result = ArchivePath::normalize_bytes(input).unwrap();
assert!(matches!(result, Cow::Borrowed(_)));
assert!(std::ptr::eq(result.as_ref().as_ptr(), input.as_ptr()));
}
#[test]
fn normalize_bytes_copies_when_changed() {
let result = ArchivePath::normalize_bytes("foo\\bar").unwrap();
assert!(matches!(result, Cow::Owned(_)));
assert_eq!(result.as_ref(), b"foo/bar");
}
#[test]
fn whitespace_only_components_rejected() {
assert!(ArchivePath::try_from("foo/ /bar").is_err());
assert!(ArchivePath::try_from("foo/\t/bar").is_err());
assert!(ArchivePath::try_from("foo/ /bar").is_err());
}
#[test]
fn leading_trailing_whitespace_rejected() {
assert!(ArchivePath::try_from("foo/ bar").is_err()); assert!(ArchivePath::try_from("foo/bar /baz").is_err()); assert!(ArchivePath::try_from(" foo/bar").is_ok()); assert!(ArchivePath::try_from("foo/bar ").is_ok()); assert!(ArchivePath::try_from("foo/bar baz/qux").is_ok());
}
#[test]
fn into_normalized_rejects_invalid_utf8() {
let path = ArchivePath::from(vec![0xFF, 0xFE]);
assert!(path.into_normalized().is_err());
}
#[test]
fn invalid_utf8_display() {
let path = ArchivePath::from(vec![0x66, 0x6F, 0x6F, 0xFF, 0x62, 0x61, 0x72]);
assert_eq!(path.as_str(), None);
assert!(path.to_string().contains('\u{FFFD}'));
}
#[test]
fn hashable() {
use std::collections::HashSet;
let mut set = HashSet::new();
set.insert(ArchivePath::try_from("foo/bar").unwrap());
set.insert(ArchivePath::try_from("foo/bar").unwrap());
set.insert(ArchivePath::try_from("baz/qux").unwrap());
assert_eq!(set.len(), 2);
}
#[test]
fn hashmap_lookup_by_bytes() {
use std::collections::HashMap;
let mut map: HashMap<ArchivePath<'static>, u32> = HashMap::new();
map.insert(ArchivePath::try_from("foo/bar").unwrap(), 42);
map.insert(ArchivePath::try_from("baz/qux").unwrap(), 99);
assert_eq!(map.get(b"foo/bar".as_slice()), Some(&42));
assert_eq!(map.get(b"baz/qux".as_slice()), Some(&99));
assert_eq!(map.get(b"nonexistent".as_slice()), None);
}
#[test]
fn orderable() {
let mut paths = [
ArchivePath::try_from("z/file").unwrap(),
ArchivePath::try_from("a/file").unwrap(),
ArchivePath::try_from("m/file").unwrap(),
];
paths.sort();
assert_eq!(paths[0].as_str(), Some("a/file"));
assert_eq!(paths[1].as_str(), Some("m/file"));
assert_eq!(paths[2].as_str(), Some("z/file"));
}
#[test]
fn len_returns_byte_count() {
let path = ArchivePath::try_from("foo/bar").unwrap();
assert_eq!(path.len(), 7);
assert_eq!(path.as_bytes().len(), path.len());
}
#[test]
fn is_empty_works() {
let empty = ArchivePath::from_bytes(b"");
assert!(empty.is_empty());
assert_eq!(empty.len(), 0);
let nonempty = ArchivePath::try_from("foo").unwrap();
assert!(!nonempty.is_empty());
}
#[test]
fn as_bytes_returns_underlying() {
let path = ArchivePath::try_from("foo/bar").unwrap();
assert_eq!(path.as_bytes(), b"foo/bar");
}
#[test]
fn asref_bytes() {
let path = ArchivePath::try_from("foo/bar").unwrap();
let bytes: &[u8] = path.as_ref();
assert_eq!(bytes, b"foo/bar");
}
#[test]
fn borrow_bytes() {
use std::borrow::Borrow;
let path = ArchivePath::try_from("foo/bar").unwrap();
let bytes: &[u8] = path.borrow();
assert_eq!(bytes, b"foo/bar");
}
#[test]
fn from_vec() {
let path = ArchivePath::from(b"foo/bar".to_vec());
assert_eq!(path.as_bytes(), b"foo/bar");
}
#[test]
fn into_vec() {
let path = ArchivePath::try_from("foo/bar").unwrap();
let vec: Vec<u8> = path.into();
assert_eq!(vec, b"foo/bar");
}
#[test]
fn display_valid_utf8() {
let path = ArchivePath::try_from("foo/bar").unwrap();
assert_eq!(format!("{}", path), "foo/bar");
}
#[test]
fn tryfrom_string() {
let path = ArchivePath::try_from(String::from("foo/bar")).unwrap();
assert_eq!(path.as_str(), Some("foo/bar"));
}
#[test]
fn tryfrom_pathbuf() {
let path = ArchivePath::try_from(PathBuf::from("foo/bar")).unwrap();
assert_eq!(path.as_str(), Some("foo/bar"));
}
#[test]
fn tryfrom_osstring() {
let path = ArchivePath::try_from(OsString::from("foo/bar")).unwrap();
assert_eq!(path.as_str(), Some("foo/bar"));
}
#[test]
fn normalize_method() {
let path = ArchivePath::from_bytes(b"foo\\bar");
let normalized = path.normalize().unwrap();
assert_eq!(normalized.as_str(), Some("foo/bar"));
}
#[test]
fn safename_leading_dash_rejected() {
assert!(ArchivePath::try_from("-rf").is_err());
assert!(ArchivePath::try_from("foo/-bar").is_err());
assert!(ArchivePath::try_from("--help").is_err());
}
#[test]
fn safename_leading_tilde_rejected() {
assert!(ArchivePath::try_from("~root").is_err());
assert!(ArchivePath::try_from("foo/~evil").is_err());
}
#[test]
fn safename_control_chars_rejected() {
assert!(ArchivePath::try_from("foo\x00bar").is_err());
assert!(ArchivePath::try_from("foo\x1Fbar").is_err());
assert!(ArchivePath::try_from("foo\x7Fbar").is_err());
}
#[test]
fn safename_xff_rejected() {
let path = ArchivePath::from(vec![0x66, 0x6F, 0x6F, 0xFF]); assert!(path.normalize().is_err());
}
#[test]
fn safename_colon_rejected() {
assert!(ArchivePath::try_from("foo:bar").is_err());
}
#[test]
fn safename_shell_metacharacters_rejected() {
assert!(ArchivePath::try_from("foo'bar").is_err());
assert!(ArchivePath::try_from("foo\"bar").is_err());
assert!(ArchivePath::try_from("foo;bar").is_err());
assert!(ArchivePath::try_from("foo|bar").is_err());
assert!(ArchivePath::try_from("foo&bar").is_err());
assert!(ArchivePath::try_from("foo>bar").is_err());
assert!(ArchivePath::try_from("foo<bar").is_err());
assert!(ArchivePath::try_from("foo`bar").is_err());
assert!(ArchivePath::try_from("foo$bar").is_err());
}
#[test]
fn safename_valid_accepted() {
assert!(ArchivePath::try_from("normal_file.txt").is_ok());
assert!(ArchivePath::try_from("foo/bar/baz.rs").is_ok());
assert!(ArchivePath::try_from("file-with-dashes.txt").is_ok()); assert!(ArchivePath::try_from("file_with_underscores").is_ok());
assert!(ArchivePath::try_from("CamelCase.TXT").is_ok());
assert!(ArchivePath::try_from("123numeric").is_ok());
}
#[test]
fn reserved_bale_prefix_rejected() {
assert!(ArchivePath::try_from(".bale").is_err());
assert!(ArchivePath::try_from(".bale.txt").is_err());
assert!(ArchivePath::try_from(".bale/file.txt").is_err());
assert!(ArchivePath::try_from("foo/.bale").is_err());
assert!(ArchivePath::try_from("foo/.bale/bar").is_err());
assert!(ArchivePath::try_from(".balesomething").is_err());
assert!(ArchivePath::try_from("dir/.baledata").is_err());
}
#[test]
fn similar_to_reserved_accepted() {
assert!(ArchivePath::try_from(".bal").is_ok());
assert!(ArchivePath::try_from("bale").is_ok());
assert!(ArchivePath::try_from("bale.txt").is_ok());
assert!(ArchivePath::try_from("foo.bale").is_ok());
assert!(ArchivePath::try_from("mybale").is_ok());
}
#[test]
fn reserved_path_error_type() {
let result = ArchivePath::try_from(".bale");
assert!(matches!(result, Err(BaleError::ReservedPath(_))));
if let Err(BaleError::ReservedPath(path)) = result {
assert_eq!(path, ".bale");
}
}
fn valid_component() -> impl Strategy<Value = String> {
"[a-zA-Z0-9][a-zA-Z0-9_.-]{0,19}"
}
fn valid_path() -> impl Strategy<Value = String> {
prop::collection::vec(valid_component(), 1..=5).prop_map(|components| components.join("/"))
}
fn component_with_spaces() -> impl Strategy<Value = String> {
"[a-zA-Z0-9][a-zA-Z0-9 _.-]{0,18}[a-zA-Z0-9_.-]"
.prop_filter("must not have leading/trailing spaces", |s| {
!s.starts_with(' ') && !s.ends_with(' ')
})
}
fn path_with_spaces() -> impl Strategy<Value = String> {
prop::collection::vec(component_with_spaces(), 1..=5)
.prop_map(|components| components.join("/"))
}
proptest! {
#![proptest_config(proptest_config::config())]
#[test]
fn valid_paths_accepted(path in valid_path()) {
let result = ArchivePath::try_from(path.as_str());
prop_assert!(result.is_ok(), "valid path rejected: {}", path);
let archive_path = result.unwrap();
prop_assert!(!archive_path.is_empty());
prop_assert!(archive_path.as_str().is_some());
}
#[test]
fn no_consecutive_slashes(path in valid_path()) {
let archive_path = ArchivePath::try_from(path.as_str()).unwrap();
let s = archive_path.as_str().unwrap();
prop_assert!(!s.contains("//"), "consecutive slashes in: {}", s);
}
#[test]
fn no_leading_trailing_slashes(path in valid_path()) {
let archive_path = ArchivePath::try_from(path.as_str()).unwrap();
let s = archive_path.as_str().unwrap();
prop_assert!(!s.starts_with('/'), "leading slash in: {}", s);
prop_assert!(!s.ends_with('/'), "trailing slash in: {}", s);
}
#[test]
fn leading_slashes_stripped(
slashes in "/+",
path in valid_path(),
) {
let input = format!("{}{}", slashes, path);
let result = ArchivePath::try_from(input.as_str());
prop_assert!(result.is_ok());
let s = result.unwrap().as_str().unwrap().to_string();
prop_assert!(!s.starts_with('/'));
}
#[test]
fn trailing_slashes_stripped(
path in valid_path(),
slashes in "/+",
) {
let input = format!("{}{}", path, slashes);
let result = ArchivePath::try_from(input.as_str());
prop_assert!(result.is_ok());
let s = result.unwrap().as_str().unwrap().to_string();
prop_assert!(!s.ends_with('/'));
}
#[test]
fn path_traversal_rejected(
num_dotdots in 1usize..=4,
suffix in prop::option::of(valid_path()),
) {
let dotdots = vec![".."; num_dotdots].join("/");
let input = match suffix {
Some(s) => format!("{}/{}", dotdots, s),
None => dotdots,
};
let result = ArchivePath::try_from(input.as_str());
prop_assert!(result.is_err(), "path traversal accepted: {}", input);
}
#[test]
fn empty_paths_rejected(
input in prop_oneof![
Just("".to_string()),
Just(".".to_string()),
Just("/".to_string()),
Just(" ".to_string()),
Just("///".to_string()),
Just("./././".to_string()),
Just("./.".to_string()),
],
) {
let result = ArchivePath::try_from(input.as_str());
prop_assert!(result.is_err(), "empty path accepted: {:?}", input);
}
#[test]
fn excess_dotdot_rejected(
components in prop::collection::vec(valid_component(), 1..=3),
extra_dotdots in 1usize..=3,
) {
let mut path = components.join("/");
for _ in 0..components.len() + extra_dotdots {
path.push_str("/..");
}
let result = ArchivePath::try_from(path.as_str());
prop_assert!(result.is_err(), "excess dotdot accepted: {}", path);
}
#[test]
fn tryfrom_all_types(path in valid_path()) {
prop_assert!(ArchivePath::try_from(path.as_str()).is_ok());
prop_assert!(ArchivePath::try_from(path.clone()).is_ok());
prop_assert!(ArchivePath::try_from(Path::new(&path)).is_ok());
prop_assert!(ArchivePath::try_from(PathBuf::from(&path)).is_ok());
prop_assert!(ArchivePath::try_from(OsStr::new(&path)).is_ok());
prop_assert!(ArchivePath::try_from(OsString::from(&path)).is_ok());
}
#[test]
fn interior_spaces_accepted(path in path_with_spaces()) {
let result = ArchivePath::try_from(path.as_str());
prop_assert!(result.is_ok(), "path with interior spaces rejected: {:?}", path);
let archive_path = result.unwrap();
prop_assert!(!archive_path.is_empty());
prop_assert!(archive_path.as_str().is_some());
}
}
}