use percent_encoding::{AsciiSet, CONTROLS, percent_encode};
use std::{
borrow::Cow,
iter::{self, FusedIterator},
str::SplitTerminator,
};
use crate::path::DELIMITER_BYTE;
#[derive(Debug, thiserror::Error)]
#[error(
"Encountered illegal character sequence \"{}\" whilst parsing path segment \"{}\"",
illegal,
segment
)]
#[allow(missing_copy_implementations)]
pub struct InvalidPart {
segment: String,
illegal: String,
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct PathPart<'a> {
pub(super) raw: Cow<'a, str>,
}
impl<'a> PathPart<'a> {
pub fn parse(segment: &'a str) -> Result<Self, InvalidPart> {
if segment == "." || segment == ".." {
return Err(InvalidPart {
segment: segment.to_string(),
illegal: segment.to_string(),
});
}
for c in segment.chars() {
if c.is_ascii_control() || c == '/' {
return Err(InvalidPart {
segment: segment.to_string(),
illegal: c.to_string(),
});
}
}
Ok(Self {
raw: segment.into(),
})
}
}
const INVALID: &AsciiSet = &CONTROLS
.add(DELIMITER_BYTE)
.add(b'\\')
.add(b'{')
.add(b'^')
.add(b'}')
.add(b'%')
.add(b'`')
.add(b']')
.add(b'"') .add(b'>')
.add(b'[')
.add(b'~')
.add(b'<')
.add(b'#')
.add(b'|')
.add(b'\r')
.add(b'\n')
.add(b'*')
.add(b'?');
impl<'a> From<&'a [u8]> for PathPart<'a> {
fn from(v: &'a [u8]) -> Self {
let inner = match v {
b"." => "%2E".into(),
b".." => "%2E%2E".into(),
other => percent_encode(other, INVALID).into(),
};
Self { raw: inner }
}
}
impl<'a> From<&'a str> for PathPart<'a> {
fn from(v: &'a str) -> Self {
Self::from(v.as_bytes())
}
}
impl From<String> for PathPart<'static> {
fn from(s: String) -> Self {
Self {
raw: Cow::Owned(PathPart::from(s.as_str()).raw.into_owned()),
}
}
}
impl AsRef<str> for PathPart<'_> {
fn as_ref(&self) -> &str {
self.raw.as_ref()
}
}
#[derive(Debug, Clone)]
pub struct PathParts<'a>(iter::Map<SplitTerminator<'a, char>, fn(&str) -> PathPart<'_>>);
impl<'a> PathParts<'a> {
pub(super) fn new(raw: &'a str) -> Self {
Self(
raw.split_terminator(super::DELIMITER_CHAR)
.map(|s| PathPart { raw: s.into() }),
)
}
}
impl<'a> Iterator for PathParts<'a> {
type Item = PathPart<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
impl<'a> FusedIterator for PathParts<'a> {}
impl<'a> DoubleEndedIterator for PathParts<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
self.0.next_back()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn path_part_delimiter_gets_encoded() {
let part: PathPart<'_> = "foo/bar".into();
assert_eq!(part.raw, "foo%2Fbar");
}
#[test]
fn path_part_given_already_encoded_string() {
let part: PathPart<'_> = "foo%2Fbar".into();
assert_eq!(part.raw, "foo%252Fbar");
}
#[test]
fn path_part_cant_be_one_dot() {
let part: PathPart<'_> = ".".into();
assert_eq!(part.raw, "%2E");
}
#[test]
fn path_part_cant_be_two_dots() {
let part: PathPart<'_> = "..".into();
assert_eq!(part.raw, "%2E%2E");
}
#[test]
fn path_part_parse() {
PathPart::parse("foo").unwrap();
PathPart::parse("foo/bar").unwrap_err();
PathPart::parse("foo%2Fbar").unwrap();
PathPart::parse("L%3ABC.parquet").unwrap();
PathPart::parse("%Z").unwrap();
PathPart::parse("%%").unwrap();
}
}