use percent_encoding::{percent_encode, AsciiSet, CONTROLS};
use std::borrow::Cow;
use crate::path::DELIMITER_BYTE;
use snafu::Snafu;
#[derive(Debug, Snafu)]
#[snafu(display(
"Encountered illegal character sequence \"{}\" whilst parsing path segment \"{}\"",
illegal,
segment
))]
#[allow(missing_copy_implementations)]
pub struct InvalidPart {
segment: String,
illegal: String,
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
pub struct PathPart<'a> {
pub(super) raw: Cow<'a, str>,
}
impl<'a> PathPart<'a> {
pub fn parse(segment: &'a str) -> Result<Self, InvalidPart> {
if segment == "." || segment == ".." {
return Err(InvalidPart {
segment: segment.to_string(),
illegal: segment.to_string(),
});
}
for (idx, b) in segment.as_bytes().iter().cloned().enumerate() {
if b == b'%' {
continue;
}
if !b.is_ascii() || should_percent_encode(b) {
return Err(InvalidPart {
segment: segment.to_string(),
illegal: segment.chars().nth(idx).unwrap().to_string(),
});
}
}
Ok(Self {
raw: segment.into(),
})
}
}
fn should_percent_encode(c: u8) -> bool {
percent_encode(&[c], INVALID).next().unwrap().len() != 1
}
const INVALID: &AsciiSet = &CONTROLS
.add(DELIMITER_BYTE)
.add(b'\\')
.add(b'{')
.add(b'^')
.add(b'}')
.add(b'%')
.add(b'`')
.add(b']')
.add(b'"') .add(b'>')
.add(b'[')
.add(b'~')
.add(b'<')
.add(b'#')
.add(b'|')
.add(b'\r')
.add(b'\n')
.add(b'*')
.add(b'?');
impl<'a> From<&'a [u8]> for PathPart<'a> {
fn from(v: &'a [u8]) -> Self {
let inner = match v {
b"." => "%2E".into(),
b".." => "%2E%2E".into(),
other => percent_encode(other, INVALID).into(),
};
Self { raw: inner }
}
}
impl<'a> From<&'a str> for PathPart<'a> {
fn from(v: &'a str) -> Self {
Self::from(v.as_bytes())
}
}
impl From<String> for PathPart<'static> {
fn from(s: String) -> Self {
Self {
raw: Cow::Owned(PathPart::from(s.as_str()).raw.into_owned()),
}
}
}
impl<'a> AsRef<str> for PathPart<'a> {
fn as_ref(&self) -> &str {
self.raw.as_ref()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn path_part_delimiter_gets_encoded() {
let part: PathPart<'_> = "foo/bar".into();
assert_eq!(part.raw, "foo%2Fbar");
}
#[test]
fn path_part_given_already_encoded_string() {
let part: PathPart<'_> = "foo%2Fbar".into();
assert_eq!(part.raw, "foo%252Fbar");
}
#[test]
fn path_part_cant_be_one_dot() {
let part: PathPart<'_> = ".".into();
assert_eq!(part.raw, "%2E");
}
#[test]
fn path_part_cant_be_two_dots() {
let part: PathPart<'_> = "..".into();
assert_eq!(part.raw, "%2E%2E");
}
#[test]
fn path_part_parse() {
PathPart::parse("foo").unwrap();
PathPart::parse("foo/bar").unwrap_err();
PathPart::parse("foo%2Fbar").unwrap();
PathPart::parse("L%3ABC.parquet").unwrap();
PathPart::parse("%Z").unwrap();
PathPart::parse("%%").unwrap();
}
}