mod error;
mod path;
use core::marker::PhantomData;
#[cfg(feature = "alloc")]
use alloc::string::String;
use crate::buffer::{Buffer, ByteSliceBuf};
use crate::components::RiReferenceComponents;
use crate::parser::char;
use crate::spec::Spec;
use crate::task::{Error as TaskError, ProcessAndWrite};
use crate::types::{RiAbsoluteStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiAbsoluteString, RiString};
pub use self::error::Error;
pub(crate) use self::path::{Path, PathToNormalize};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum NormalizationType {
Full,
RemoveDotSegments,
}
#[derive(Debug, Clone, Copy)]
pub struct NormalizationTask<'a, T: ?Sized> {
common: NormalizationTaskCommon<'a>,
_spec: PhantomData<fn() -> T>,
}
impl<'a, S: Spec> From<&'a RiStr<S>> for NormalizationTask<'a, RiStr<S>> {
fn from(iri: &'a RiStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
let common = NormalizationTaskCommon {
scheme,
authority,
path,
query,
fragment,
op: NormalizationType::Full,
};
common.into()
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiString<S>> for NormalizationTask<'a, RiStr<S>> {
#[inline]
fn from(iri: &'a RiString<S>) -> Self {
NormalizationTask::from(iri.as_slice())
}
}
impl<'a, S: Spec> From<&'a RiAbsoluteStr<S>> for NormalizationTask<'a, RiAbsoluteStr<S>> {
fn from(iri: &'a RiAbsoluteStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
let common = NormalizationTaskCommon {
scheme,
authority,
path,
query,
fragment,
op: NormalizationType::Full,
};
common.into()
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiAbsoluteString<S>> for NormalizationTask<'a, RiAbsoluteStr<S>> {
#[inline]
fn from(iri: &'a RiAbsoluteString<S>) -> Self {
NormalizationTask::from(iri.as_slice())
}
}
impl<'a, T: ?Sized + AsRef<str>> NormalizationTask<'a, T> {
pub(crate) fn enable_normalization(&mut self) {
debug_assert!(
matches!(
self.common.op,
NormalizationType::Full | NormalizationType::RemoveDotSegments
),
"No cases should be overlooked"
);
self.common.op = NormalizationType::Full;
}
fn write_to_buf<'b, B: Buffer<'b>>(&self, buf: B) -> Result<&'b [u8], TaskError<Error>>
where
TaskError<Error>: From<B::ExtendError>,
{
self.common.write_to_buf(buf).map_err(Into::into)
}
#[must_use]
pub fn estimate_max_buf_size_for_resolution(&self) -> usize {
let known_exact = self.common.scheme.len()
+ self.common.authority.map_or(0, |s| s.len() + 2)
+ self.common.query.map_or(0, |s| s.len() + 1)
+ self.common.fragment.map_or(0, |s| s.len() + 1);
let path_max = self.common.path.estimate_max_buf_size_for_resolution();
known_exact + path_max
}
}
impl<'a, T: ?Sized> From<NormalizationTaskCommon<'a>> for NormalizationTask<'a, T> {
#[inline]
fn from(common: NormalizationTaskCommon<'a>) -> Self {
Self {
common,
_spec: PhantomData,
}
}
}
impl<S: Spec> ProcessAndWrite for &NormalizationTask<'_, RiStr<S>> {
type OutputBorrowed = RiStr<S>;
#[cfg(feature = "alloc")]
type OutputOwned = RiString<S>;
type ProcessError = Error;
#[cfg(feature = "alloc")]
fn allocate_and_write(self) -> Result<Self::OutputOwned, TaskError<Self::ProcessError>> {
let mut s = String::new();
self.write_to_buf(&mut s)?;
Ok(RiString::try_from(s).expect("[consistency] the resolved IRI must be valid"))
}
fn write_to_byte_slice(
self,
buf: &mut [u8],
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let buf = ByteSliceBuf::new(buf);
let s = self.write_to_buf(buf)?;
let s = <&RiStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
#[cfg(feature = "alloc")]
fn append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, Self::ProcessError> {
match self.try_append_to_std_string(buf) {
Ok(v) => Ok(v),
Err(TaskError::Buffer(e)) => panic!("buffer error: {}", e),
Err(TaskError::Process(e)) => Err(e),
}
}
#[cfg(feature = "alloc")]
fn try_append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let s = self.write_to_buf(buf)?;
let s = <&RiStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
}
impl<S: Spec> ProcessAndWrite for &NormalizationTask<'_, RiAbsoluteStr<S>> {
type OutputBorrowed = RiAbsoluteStr<S>;
#[cfg(feature = "alloc")]
type OutputOwned = RiAbsoluteString<S>;
type ProcessError = Error;
#[cfg(feature = "alloc")]
fn allocate_and_write(self) -> Result<Self::OutputOwned, TaskError<Self::ProcessError>> {
let mut s = String::new();
self.write_to_buf(&mut s)?;
Ok(RiAbsoluteString::try_from(s).expect("[consistency] the resolved IRI must be valid"))
}
fn write_to_byte_slice(
self,
buf: &mut [u8],
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let buf = ByteSliceBuf::new(buf);
let s = self.write_to_buf(buf)?;
let s =
<&RiAbsoluteStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
#[cfg(feature = "alloc")]
fn append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, Self::ProcessError> {
match self.try_append_to_std_string(buf) {
Ok(v) => Ok(v),
Err(TaskError::Buffer(e)) => panic!("buffer error: {}", e),
Err(TaskError::Process(e)) => Err(e),
}
}
#[cfg(feature = "alloc")]
fn try_append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let s = self.write_to_buf(buf)?;
let s =
<&RiAbsoluteStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct NormalizationTaskCommon<'a> {
pub(crate) scheme: &'a str,
pub(crate) authority: Option<&'a str>,
pub(crate) path: Path<'a>,
pub(crate) query: Option<&'a str>,
pub(crate) fragment: Option<&'a str>,
pub(crate) op: NormalizationType,
}
impl<'a> NormalizationTaskCommon<'a> {
fn write_to_buf<'b, B: Buffer<'b>>(&self, mut buf: B) -> Result<&'b [u8], TaskError<Error>>
where
TaskError<Error>: From<B::ExtendError>,
{
let buf_offset = buf.as_bytes().len();
match self.op {
NormalizationType::Full => {
buf.extend_chars(self.scheme.chars().map(|c| c.to_ascii_lowercase()))?;
}
NormalizationType::RemoveDotSegments => {
buf.push_str(self.scheme)?;
}
}
buf.push_str(":")?;
if let Some(authority) = self.authority {
buf.push_str("//")?;
match self.op {
NormalizationType::Full => {
buf.extend_chars(normalize_case_and_pct_encodings(authority))?;
}
NormalizationType::RemoveDotSegments => {
buf.push_str(authority)?;
}
}
}
let path_start_pos = buf.as_bytes().len();
match self.path {
Path::Done(s) => {
buf.push_str(s)?;
}
Path::NeedsProcessing(path) => {
path.normalize(&mut buf, self.op)?;
}
}
if self.authority.is_none() && buf.as_bytes()[path_start_pos..].starts_with(b"//") {
return Err(TaskError::Process(Error::new()));
}
if let Some(query) = self.query {
buf.push_str("?")?;
match self.op {
NormalizationType::Full => {
buf.extend_chars(normalize_pct_encodings(query))?;
}
NormalizationType::RemoveDotSegments => {
buf.push_str(query)?;
}
}
}
if let Some(fragment) = self.fragment {
buf.push_str("#")?;
match self.op {
NormalizationType::Full => {
buf.extend_chars(normalize_pct_encodings(fragment))?;
}
NormalizationType::RemoveDotSegments => {
buf.push_str(fragment)?;
}
}
}
Ok(&buf.into_bytes()[buf_offset..])
}
}
#[derive(Debug, Clone)]
struct NormalizeCaseAndPercentEncodings<'a> {
rest: &'a str,
rest_pct_encoded: u8,
normalize_case: bool,
}
impl NormalizeCaseAndPercentEncodings<'_> {
fn consume_char(&mut self) -> Option<char> {
let mut iter = self.rest.chars();
let next = iter.next()?;
let advanced = self.rest.len() - iter.as_str().len();
self.rest = &self.rest[advanced..];
Some(next)
}
}
impl Iterator for NormalizeCaseAndPercentEncodings<'_> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
let first_char = self.consume_char()?;
if let Some(new_rest_pct) = self.rest_pct_encoded.checked_sub(1) {
self.rest_pct_encoded = new_rest_pct;
return Some(first_char.to_ascii_uppercase());
}
if first_char != '%' {
if self.normalize_case && first_char.is_ascii_uppercase() {
return Some(first_char.to_ascii_lowercase());
}
return Some(first_char);
}
let decoded = {
let bytes = self.rest.as_bytes();
let upper_hex = match bytes[0] {
c @ b'0'..=b'9' => c - b'0',
c @ b'a'..=b'f' => c - b'a' + 10,
c @ b'A'..=b'F' => c - b'A' + 10,
_ => {
unreachable!("valid IRIs must not have incomplete or invalid percent encodings")
}
};
let lower_hex = match bytes[1] {
c @ b'0'..=b'9' => c - b'0',
c @ b'a'..=b'f' => c - b'a' + 10,
c @ b'A'..=b'F' => c - b'A' + 10,
_ => {
unreachable!("valid IRIs must not have incomplete or invalid percent encodings")
}
};
let code = (upper_hex << 4) | lower_hex;
if self.normalize_case && code.is_ascii_uppercase() {
code.to_ascii_lowercase()
} else {
code
}
};
if decoded.is_ascii() && char::is_ascii_unreserved(decoded) {
self.consume_char();
self.consume_char();
return Some(decoded as char);
}
self.rest_pct_encoded = 2;
Some(first_char)
}
}
fn normalize_case_and_pct_encodings(i: &str) -> NormalizeCaseAndPercentEncodings<'_> {
NormalizeCaseAndPercentEncodings {
rest: i,
rest_pct_encoded: 0,
normalize_case: true,
}
}
fn normalize_pct_encodings(i: &str) -> NormalizeCaseAndPercentEncodings<'_> {
NormalizeCaseAndPercentEncodings {
rest: i,
rest_pct_encoded: 0,
normalize_case: false,
}
}
#[cfg(test)]
mod tests {
#[cfg(feature = "alloc")]
use crate::types::IriStr;
#[cfg(feature = "alloc")]
const CASES: &[(&str, &[&str], &[&str])] = &[
(
"https://example.com/pa/th?query#frag",
&["https://example.com/pa/th?query#frag"],
&[],
),
(
"https://example.com/pA/Th?Query#Frag",
&["HTTPs://EXaMPLE.COM/pA/Th?Query#Frag"],
&[
"https://example.com/pa/th?Query#Frag",
"https://example.com/pA/Th?query#Frag",
"https://example.com/pA/Th?Query#frag",
],
),
(
"urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822",
&[
"urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822",
"URN:uuid:7f1450df-6678-465b-a881-188f9b6ec822",
],
&[
"urn:UUID:7f1450df-6678-465b-a881-188f9b6ec822",
"urn:uuid:7F1450DF-6678-465B-A881-188F9B6EC822",
],
),
(
"http://example.com/a/b/d/e",
&[
"http://example.com/a/b/c/%2e%2e/d/e",
"http://example.com/a/b/c/%2E%2E/d/e",
"http://example.com/a/b/c/../d/e",
"http://example.com/a/b/c/%2E%2e/d/e",
"http://example.com/a/b/c/.%2e/d/e",
"http://example.com/a/./././././b/c/.%2e/d/e",
],
&[],
),
(
"http://example.com/~Ascii%21",
&["http://example.com/%7E%41%73%63%69%69%21"],
&[],
),
];
#[test]
#[cfg(feature = "alloc")]
fn normalize() {
for (expected, sources, different_iris) in CASES {
let expected = IriStr::new(*expected).expect("must be a valid IRI");
assert_eq!(
expected
.normalize()
.expect("normalized IRI must be normalizable"),
expected,
"IRI normalization must be idempotent"
);
for src in *sources {
let src = IriStr::new(*src).expect("must be a valid IRI");
let normalized = src.normalize().expect("should be normalizable");
assert_eq!(normalized, expected);
}
for different in *different_iris {
let different = IriStr::new(*different).expect("must be a valid IRI");
let normalized = different.normalize().expect("should be normalizable");
assert_ne!(
normalized, expected,
"{:?} should not be normalized to {:?}",
different, expected
);
}
}
}
}