use std::{
ffi::OsStr,
fmt::Display,
ops::Deref,
path::{Path, PathBuf},
rc::Rc,
str::FromStr,
sync::Arc,
};
pub trait ToPathSegments {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError>;
}
impl ToPathSegments for str {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
if self.is_empty() {
return Err(PathSegmentError::Empty);
}
let blacklisted_characters = ['\\', ':'];
for c in blacklisted_characters {
if self.contains(c) {
return Err(PathSegmentError::InvalidCharacter(c));
}
}
let mut segments: Vec<PathSegment> = Vec::new();
for word in self.split('/') {
if word.is_empty() {
continue;
}
segments.push(word.parse()?);
}
Ok(PathSegments(segments))
}
}
impl ToPathSegments for String {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
self.as_str().to_path_segments()
}
}
impl ToPathSegments for [&str] {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
let segments = self
.iter()
.map(|s| s.parse())
.collect::<Result<Vec<_>, _>>()?;
Ok(PathSegments(segments))
}
}
impl ToPathSegments for [PathSegment] {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
Ok(PathSegments(self.to_vec()))
}
}
impl ToPathSegments for Path {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
if !self.has_root() {
return Err(PathSegmentError::NotAbsolute);
}
let mut segments = Vec::new();
for component in self.components() {
match component {
std::path::Component::Prefix(_) | std::path::Component::RootDir => segments.clear(),
std::path::Component::CurDir => continue,
std::path::Component::ParentDir => {
segments.pop();
}
std::path::Component::Normal(s) => {
let segment = s
.to_str()
.ok_or_else(|| {
PathSegmentError::IllegalPathComponent(
s.to_os_string().to_string_lossy().into_owned(),
)
})
.and_then(|s| s.parse())?;
segments.push(segment);
}
}
}
Ok(PathSegments(segments))
}
}
impl ToPathSegments for PathBuf {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
self.as_path().to_path_segments()
}
}
impl ToPathSegments for Vec<PathSegment> {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
self.as_slice().to_path_segments()
}
}
impl<const N: usize> ToPathSegments for [&str; N] {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
self.as_ref().to_path_segments()
}
}
impl<T> ToPathSegments for &'_ T
where
T: ToPathSegments + ?Sized,
{
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
(**self).to_path_segments()
}
}
impl<T> ToPathSegments for Box<T>
where
T: ToPathSegments + ?Sized,
{
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
(**self).to_path_segments()
}
}
impl<T> ToPathSegments for Arc<T>
where
T: ToPathSegments + ?Sized,
{
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
(**self).to_path_segments()
}
}
impl<T> ToPathSegments for Rc<T>
where
T: ToPathSegments + ?Sized,
{
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
(**self).to_path_segments()
}
}
impl ToPathSegments for PathSegments {
fn to_path_segments(&self) -> Result<PathSegments, PathSegmentError> {
Ok(self.clone())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct PathSegments(pub(crate) Vec<PathSegment>);
impl PathSegments {
pub const ROOT: PathSegments = PathSegments(Vec::new());
pub fn push(&mut self, segment: PathSegment) {
self.0.push(segment);
}
pub fn pop(&mut self) -> Option<PathSegment> {
self.0.pop()
}
#[must_use]
pub fn join(&self, segment: PathSegment) -> PathSegments {
let mut path = self.clone();
path.push(segment);
path
}
pub fn iter(&self) -> impl Iterator<Item = &'_ PathSegment> + '_ {
IntoIterator::into_iter(self)
}
}
impl Display for PathSegments {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.0.is_empty() {
return write!(f, "/");
}
for segment in self.iter() {
write!(f, "/{segment}")?;
}
Ok(())
}
}
impl<'a, A: AsRef<[&'a str]>> PartialEq<A> for PathSegments {
fn eq(&self, other: &A) -> bool {
let other = other.as_ref();
self.0.len() == other.len() && self.0.iter().zip(other).all(|(lhs, &rhs)| lhs == rhs)
}
}
impl FromIterator<PathSegment> for PathSegments {
fn from_iter<T: IntoIterator<Item = PathSegment>>(iter: T) -> Self {
PathSegments(iter.into_iter().collect())
}
}
impl IntoIterator for PathSegments {
type Item = PathSegment;
type IntoIter = <Vec<PathSegment> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl FromStr for PathSegments {
type Err = PathSegmentError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
s.to_path_segments()
}
}
impl<'a> IntoIterator for &'a PathSegments {
type Item = &'a PathSegment;
type IntoIter = std::slice::Iter<'a, PathSegment>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
#[derive(Debug, Clone, PartialEq, thiserror::Error)]
#[non_exhaustive]
pub enum PathSegmentError {
#[error("Path segments must be UTF-8 strings, found \"{}\"", segment.escape_ascii())]
InvalidUtf8 {
segment: Vec<u8>,
},
#[error("\"{_0:?}\" isn't a valid path segment")]
IllegalPathComponent(String),
#[error("Invalid character, \"{}\"", _0.escape_default())]
InvalidCharacter(char),
#[error("Path segments can't be empty")]
Empty,
#[error("The path isn't absolute")]
NotAbsolute,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct PathSegment(Arc<str>);
impl PathSegment {
pub fn parse(s: &str) -> Result<Self, PathSegmentError> {
s.parse()
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl Display for PathSegment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Display::fmt(self.as_str(), f)
}
}
impl TryFrom<&str> for PathSegment {
type Error = PathSegmentError;
fn try_from(s: &str) -> Result<Self, Self::Error> {
const ILLEGAL_SEGMENTS: &[&str] = &[".", ".."];
if s.is_empty() {
Err(PathSegmentError::Empty)
} else if ILLEGAL_SEGMENTS.contains(&s) {
Err(PathSegmentError::IllegalPathComponent(s.into()))
} else {
Ok(PathSegment(s.into()))
}
}
}
impl TryFrom<&OsStr> for PathSegment {
type Error = PathSegmentError;
fn try_from(value: &OsStr) -> Result<Self, Self::Error> {
value
.to_str()
.ok_or_else(|| {
PathSegmentError::IllegalPathComponent(
value.to_os_string().to_string_lossy().into_owned(),
)
})
.and_then(|s| s.try_into())
}
}
impl FromStr for PathSegment {
type Err = PathSegmentError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
s.try_into()
}
}
impl Deref for PathSegment {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl AsRef<str> for PathSegment {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl PartialEq<str> for PathSegment {
fn eq(&self, other: &str) -> bool {
&**self == other
}
}
impl PartialEq<&str> for PathSegment {
fn eq(&self, other: &&str) -> bool {
self == *other
}
}
impl PartialEq<PathSegment> for &str {
fn eq(&self, other: &PathSegment) -> bool {
*other == *self
}
}
#[cfg(test)]
mod tests {
use std::cmp::Ordering;
use super::*;
#[test]
fn invalid_path_segment() {
let paths = ["..", "", "."];
for path in paths {
let _ = path.to_path_segments().unwrap_err();
}
}
#[test]
fn parse_path_segments_from_strings() {
let inputs: &[(&str, &[&str])] = &[
("/", &[]),
("root", &["root"]),
("/root", &["root"]),
("path/to", &["path", "to"]),
("path/to//file.txt", &["path", "to", "file.txt"]),
];
for (src, expected) in inputs {
let path = src.to_path_segments().unwrap();
assert_eq!(path, *expected);
for segment in &path {
let _: PathSegment = segment.parse().unwrap();
}
}
}
#[test]
fn the_empty_string_isnt_allowed() {
let error = "".to_path_segments().unwrap_err();
assert_eq!(error, PathSegmentError::Empty);
}
#[test]
fn order_lexiconographically_by_segment() {
let inputs = [
("/a", "/a/", Ordering::Equal),
("/a", "/b", Ordering::Less),
("/azzzz", "/a/a", Ordering::Greater),
("/a/zzzz", "/a/a", Ordering::Greater),
];
for (left, right, expected) in inputs {
let left = left.to_path_segments().unwrap();
let right = right.to_path_segments().unwrap();
assert_eq!(left.cmp(&right), expected, "{left:?}, {right:?}");
}
}
#[test]
fn backslash_isnt_allowed() {
let src = r"\path\to\file";
let err = src.to_path_segments().unwrap_err();
assert_eq!(err, PathSegmentError::InvalidCharacter('\\'));
}
#[test]
fn reject_drive_numbers() {
let src = r"C:";
let err = src.to_path_segments().unwrap_err();
assert_eq!(err, PathSegmentError::InvalidCharacter(':'));
}
#[test]
fn convert_std_path() {
assert_eq!(
Path::new("/").to_path_segments().unwrap(),
PathSegments::ROOT,
);
assert_eq!(
Path::new("/path/./to/./file.txt")
.to_path_segments()
.unwrap(),
["path", "to", "file.txt"].to_path_segments().unwrap(),
);
assert_eq!(
Path::new("/path/../file.txt").to_path_segments().unwrap(),
["file.txt"].to_path_segments().unwrap(),
);
assert_eq!(
Path::new("/path/to/file.txt").to_path_segments().unwrap(),
["path", "to", "file.txt"].to_path_segments().unwrap(),
);
assert_eq!(
Path::new(".").to_path_segments().unwrap_err(),
PathSegmentError::NotAbsolute,
);
assert_eq!(
Path::new("..").to_path_segments().unwrap_err(),
PathSegmentError::NotAbsolute,
);
assert_eq!(
Path::new("").to_path_segments().unwrap_err(),
PathSegmentError::NotAbsolute,
);
}
#[test]
#[cfg_attr(not(windows), ignore = "Only works with Path's logic on Windows")]
fn convert_windows_paths() {
let inputs: Vec<(&str, &[&str])> = vec![
(r"C:\path\to\file.txt", &["path", "to", "file.txt"]),
(r"C:/path/to/file.txt", &["path", "to", "file.txt"]),
(r"\\system07\C$\", &[]),
(r"c:\temp\test-file.txt", &["temp", "test-file.txt"]),
(
r"\\127.0.0.1\c$\temp\test-file.txt",
&["temp", "test-file.txt"],
),
(r"\\.\c:\temp\test-file.txt", &["temp", "test-file.txt"]),
(r"\\?\c:\temp\test-file.txt", &["temp", "test-file.txt"]),
(
r"\\.\Volume{b75e2c83-0000-0000-0000-602f00000000}\temp\test-file.txt",
&["temp", "test-file.txt"],
),
];
for (path, expected) in inputs {
let normalized = Path::new(path).to_path_segments().unwrap();
assert_eq!(normalized, expected.to_path_segments().unwrap(), "{path:?}");
}
}
}