mod error;
mod path;
mod percent_encoding;
use core::marker::PhantomData;
#[cfg(feature = "alloc")]
use alloc::string::String;
use crate::buffer::{Buffer, ByteSliceBuf};
use crate::components::RiReferenceComponents;
use crate::parser::str::rfind_split_hole;
use crate::parser::trusted::is_ascii_only_host;
use crate::spec::Spec;
use crate::task::{Error as TaskError, ProcessAndWrite};
use crate::types::{RiAbsoluteStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiAbsoluteString, RiString};
pub use self::error::Error;
pub(crate) use self::path::{Path, PathToNormalize};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct NormalizationOp {
pub(crate) case_pct_normalization: bool,
pub(crate) whatwg_serialization: bool,
}
#[derive(Debug, Clone, Copy)]
pub struct NormalizationTask<'a, T: ?Sized> {
common: NormalizationTaskCommon<'a>,
_spec: PhantomData<fn() -> T>,
}
impl<'a, S: Spec> From<&'a RiStr<S>> for NormalizationTask<'a, RiStr<S>> {
fn from(iri: &'a RiStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
let common = NormalizationTaskCommon {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
case_pct_normalization: true,
whatwg_serialization: false,
},
};
common.into()
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiString<S>> for NormalizationTask<'a, RiStr<S>> {
#[inline]
fn from(iri: &'a RiString<S>) -> Self {
NormalizationTask::from(iri.as_slice())
}
}
impl<'a, S: Spec> From<&'a RiAbsoluteStr<S>> for NormalizationTask<'a, RiAbsoluteStr<S>> {
fn from(iri: &'a RiAbsoluteStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
let common = NormalizationTaskCommon {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
case_pct_normalization: true,
whatwg_serialization: false,
},
};
common.into()
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiAbsoluteString<S>> for NormalizationTask<'a, RiAbsoluteStr<S>> {
#[inline]
fn from(iri: &'a RiAbsoluteString<S>) -> Self {
NormalizationTask::from(iri.as_slice())
}
}
impl<'a, T: ?Sized + AsRef<str>> NormalizationTask<'a, T> {
#[inline]
pub fn enable_normalization(&mut self) {
self.common.op.case_pct_normalization = true;
}
#[inline]
pub fn enable_whatwg_serialization(&mut self) {
self.common.op.whatwg_serialization = true;
}
fn write_to_buf<'b, B: Buffer<'b>, S: Spec>(&self, buf: B) -> Result<&'b [u8], TaskError<Error>>
where
TaskError<Error>: From<B::ExtendError>,
{
self.common.write_to_buf::<B, S>(buf).map_err(Into::into)
}
#[must_use]
pub fn estimate_max_buf_size_for_resolution(&self) -> usize {
let known_exact = self.common.scheme.len()
+ self.common.authority.map_or(0, |s| s.len() + 2)
+ self.common.query.map_or(0, |s| s.len() + 1)
+ self.common.fragment.map_or(0, |s| s.len() + 1);
let path_max = self.common.path.estimate_max_buf_size_for_resolution();
known_exact + path_max
}
}
impl<'a, T: ?Sized> From<NormalizationTaskCommon<'a>> for NormalizationTask<'a, T> {
#[inline]
fn from(common: NormalizationTaskCommon<'a>) -> Self {
Self {
common,
_spec: PhantomData,
}
}
}
impl<S: Spec> ProcessAndWrite for &NormalizationTask<'_, RiStr<S>> {
type OutputBorrowed = RiStr<S>;
#[cfg(feature = "alloc")]
type OutputOwned = RiString<S>;
type ProcessError = Error;
#[cfg(feature = "alloc")]
fn allocate_and_write(self) -> Result<Self::OutputOwned, TaskError<Self::ProcessError>> {
let mut s = String::new();
self.write_to_buf::<_, S>(&mut s)?;
Ok(RiString::try_from(s).expect("[consistency] the resolved IRI must be valid"))
}
fn write_to_byte_slice(
self,
buf: &mut [u8],
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let buf = ByteSliceBuf::new(buf);
let s = self.write_to_buf::<_, S>(buf)?;
let s = <&RiStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
#[cfg(feature = "alloc")]
fn append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, Self::ProcessError> {
match self.try_append_to_std_string(buf) {
Ok(v) => Ok(v),
Err(TaskError::Buffer(e)) => panic!("buffer error: {}", e),
Err(TaskError::Process(e)) => Err(e),
}
}
#[cfg(feature = "alloc")]
fn try_append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let s = self.write_to_buf::<_, S>(buf)?;
let s = <&RiStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
}
impl<S: Spec> ProcessAndWrite for &NormalizationTask<'_, RiAbsoluteStr<S>> {
type OutputBorrowed = RiAbsoluteStr<S>;
#[cfg(feature = "alloc")]
type OutputOwned = RiAbsoluteString<S>;
type ProcessError = Error;
#[cfg(feature = "alloc")]
fn allocate_and_write(self) -> Result<Self::OutputOwned, TaskError<Self::ProcessError>> {
let mut s = String::new();
self.write_to_buf::<_, S>(&mut s)?;
Ok(RiAbsoluteString::try_from(s).expect("[consistency] the resolved IRI must be valid"))
}
fn write_to_byte_slice(
self,
buf: &mut [u8],
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let buf = ByteSliceBuf::new(buf);
let s = self.write_to_buf::<_, S>(buf)?;
let s =
<&RiAbsoluteStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
#[cfg(feature = "alloc")]
fn append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, Self::ProcessError> {
match self.try_append_to_std_string(buf) {
Ok(v) => Ok(v),
Err(TaskError::Buffer(e)) => panic!("buffer error: {}", e),
Err(TaskError::Process(e)) => Err(e),
}
}
#[cfg(feature = "alloc")]
fn try_append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let s = self.write_to_buf::<_, S>(buf)?;
let s =
<&RiAbsoluteStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct NormalizationTaskCommon<'a> {
pub(crate) scheme: &'a str,
pub(crate) authority: Option<&'a str>,
pub(crate) path: Path<'a>,
pub(crate) query: Option<&'a str>,
pub(crate) fragment: Option<&'a str>,
pub(crate) op: NormalizationOp,
}
impl<'a> NormalizationTaskCommon<'a> {
fn write_to_buf<'b, B: Buffer<'b>, S: Spec>(
&self,
mut buf: B,
) -> Result<&'b [u8], TaskError<Error>>
where
TaskError<Error>: From<B::ExtendError>,
{
let buf_offset = buf.as_bytes().len();
if self.op.case_pct_normalization {
buf.extend_chars(self.scheme.chars().map(|c| c.to_ascii_lowercase()))?;
} else {
buf.push_str(self.scheme)?;
}
buf.push_str(":")?;
if let Some(authority) = self.authority {
buf.push_str("//")?;
if self.op.case_pct_normalization {
let host_port = match rfind_split_hole(authority, b'@') {
Some((userinfo, host_port)) => {
buf.extend_chars(normalize_case_and_pct_encodings::<S>(userinfo))?;
buf.push_str("@")?;
host_port
}
None => authority,
};
if is_ascii_only_host(host_port) {
let mut chars = normalize_case_and_pct_encodings::<S>(host_port);
loop {
buf.extend_chars(
chars
.by_ref()
.take_while(|c| *c != '%')
.map(|c| c.to_ascii_lowercase()),
)?;
let pct_upper = match chars.next() {
Some(v) => v,
None => break,
};
let pct_lower = chars.next().expect(
"[validity] valid IRI must have following two hexxdigits after `%`",
);
debug_assert!(
!pct_upper.is_ascii_lowercase() && !pct_lower.is_ascii_lowercase(),
"[consistency] percent-encoded triplets should not be \
normalized to uppercase"
);
buf.extend_chars(['%', pct_upper, pct_lower])?;
}
} else {
buf.extend_chars(normalize_case_and_pct_encodings::<S>(host_port))?;
}
} else {
buf.push_str(authority)?;
}
}
let path_start_pos = buf.as_bytes().len();
match self.path {
Path::Done(s) => {
buf.push_str(s)?;
}
Path::NeedsProcessing(path) => {
path.normalize::<_, S>(&mut buf, self.op)?;
}
}
if self.authority.is_none()
&& buf.as_bytes()[path_start_pos..].starts_with(b"//")
&& !self.op.whatwg_serialization
{
return Err(TaskError::Process(Error::new()));
}
if let Some(query) = self.query {
buf.push_str("?")?;
if self.op.case_pct_normalization {
buf.extend_chars(normalize_case_and_pct_encodings::<S>(query))?;
} else {
buf.push_str(query)?;
}
}
if let Some(fragment) = self.fragment {
buf.push_str("#")?;
if self.op.case_pct_normalization {
buf.extend_chars(normalize_case_and_pct_encodings::<S>(fragment))?;
} else {
buf.push_str(fragment)?;
}
}
Ok(&buf.into_bytes()[buf_offset..])
}
}
pub(crate) fn normalize_case_and_pct_encodings<S: Spec>(
i: &str,
) -> core::iter::Flatten<percent_encoding::PctNormalizedFragments<'_, S>> {
percent_encoding::PctNormalizedFragments::new(i).flatten()
}
#[cfg(test)]
mod tests {
#[cfg(feature = "alloc")]
use crate::types::{IriAbsoluteStr, IriReferenceStr, IriStr, UriStr};
#[cfg(feature = "alloc")]
const CASES: &[(&str, &[&str], &[&str])] = &[
(
"https://example.com/pa/th?query#frag",
&["https://example.com/pa/th?query#frag"],
&[],
),
(
"https://example.com/pA/Th?Query#Frag",
&["HTTPs://EXaMPLE.COM/pA/Th?Query#Frag"],
&[
"https://example.com/pa/th?Query#Frag",
"https://example.com/pA/Th?query#Frag",
"https://example.com/pA/Th?Query#frag",
],
),
(
"urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822",
&[
"urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822",
"URN:uuid:7f1450df-6678-465b-a881-188f9b6ec822",
],
&[
"urn:UUID:7f1450df-6678-465b-a881-188f9b6ec822",
"urn:uuid:7F1450DF-6678-465B-A881-188F9B6EC822",
],
),
(
"http://example.com/a/b/d/e",
&[
"http://example.com/a/b/c/%2e%2e/d/e",
"http://example.com/a/b/c/%2E%2E/d/e",
"http://example.com/a/b/c/../d/e",
"http://example.com/a/b/c/%2E%2e/d/e",
"http://example.com/a/b/c/.%2e/d/e",
"http://example.com/a/./././././b/c/.%2e/d/e",
],
&[],
),
(
"http://example.com/~Ascii%21",
&["http://example.com/%7E%41%73%63%69%69%21"],
&[],
),
];
#[test]
#[cfg(feature = "alloc")]
fn normalize() {
for (expected, sources, different_iris) in CASES {
let expected = IriStr::new(*expected).expect("must be a valid IRI");
assert_eq!(
expected
.try_normalize()
.expect("normalized IRI must be normalizable"),
expected,
"IRI normalization must be idempotent"
);
for src in *sources {
let src = IriStr::new(*src).expect("must be a valid IRI");
let normalized = src.try_normalize().expect("should be normalizable");
assert_eq!(normalized, expected);
}
for different in *different_iris {
let different = IriStr::new(*different).expect("must be a valid IRI");
let normalized = different.try_normalize().expect("should be normalizable");
assert_ne!(
normalized, expected,
"{:?} should not be normalized to {:?}",
different, expected
);
}
}
}
#[test]
#[cfg(feature = "alloc")]
fn normalize_percent_encoded_non_ascii_in_uri() {
let uri = UriStr::new("http://example.com/?a=%CE%B1&b=%CE%CE%B1%B1")
.expect("must be a valid URI");
let normalized = uri.try_normalize().expect("should be normalizable");
assert_eq!(normalized, "http://example.com/?a=%CE%B1&b=%CE%CE%B1%B1");
}
#[test]
#[cfg(feature = "alloc")]
fn normalize_percent_encoded_non_ascii_in_iri() {
let iri = IriStr::new("http://example.com/?a=%CE%B1&b=%CE%CE%B1%B1")
.expect("must be a valid IRI");
let normalized = iri.try_normalize().expect("should be normalizable");
assert_eq!(
normalized, "http://example.com/?a=\u{03B1}&b=%CE\u{03B1}%B1",
"U+03B1 is an unreserved character"
);
}
#[test]
#[cfg(feature = "alloc")]
fn resolution_without_normalization() {
let iri_base =
IriAbsoluteStr::new("HTTP://%55%73%65%72:%50%61%73%73@EXAMPLE.COM/path/PATH/%ce%b1%ff")
.expect("must be a valid IRI");
let iri: &IriReferenceStr = iri_base.as_ref();
let normalized = iri
.try_resolve_against(iri_base)
.expect("should produce valid result");
assert_eq!(
&*normalized,
"HTTP://%55%73%65%72:%50%61%73%73@EXAMPLE.COM/path/PATH/%ce%b1%ff"
);
}
#[test]
#[cfg(feature = "alloc")]
fn resolution_with_normalization() {
let iri_base =
IriAbsoluteStr::new("HTTP://%55%73%65%72:%50%61%73%73@EXAMPLE.COM/path/PATH/%ce%b1%ff")
.expect("must be a valid IRI");
let iri: &IriReferenceStr = iri_base.as_ref();
let normalized = iri
.try_resolve_normalize_against(iri_base)
.expect("should produce valid result");
assert_eq!(
&*normalized,
"http://User:Pass@example.com/path/PATH/\u{03B1}%FF"
);
}
#[test]
#[cfg(feature = "alloc")]
fn normalize_non_ascii_only_host() {
let uri = UriStr::new("SCHEME://Alpha%ce%b1/").expect("must be a valid URI");
let normalized = uri.try_normalize().expect("should be normalizable");
assert_eq!(normalized, "scheme://Alpha%CE%B1/");
}
#[test]
#[cfg(feature = "alloc")]
fn normalize_host_with_sub_delims() {
let uri = UriStr::new("SCHEME://PLUS%2bPLUS/").expect("must be a valid URI");
let normalized = uri.try_normalize().expect("should be normalizable");
assert_eq!(
normalized, "scheme://plus%2Bplus/",
"hexdigits in percent-encoding triplets should be normalized to uppercase"
);
}
#[test]
#[cfg(feature = "alloc")]
fn whatwg_normalization() {
let uri = UriStr::new("scheme:..///not-a-host").expect("must be a valid URI");
assert!(!uri.is_normalized_whatwg());
let normalized = uri.try_normalize_whatwg().expect("cannot allocate memory");
assert_eq!(normalized, "scheme:/.//not-a-host");
assert!(normalized.is_normalized_whatwg());
let normalized_again = uri.try_normalize_whatwg().expect("cannot allocate memory");
assert_eq!(normalized_again, normalized);
assert!(normalized_again.is_normalized_whatwg());
}
}