mod error;
mod path;
mod pct_case;
use core::fmt::{self, Display as _, Write as _};
use core::marker::PhantomData;
use crate::buffer::{Buffer, ByteSliceBuf, FmtWritableBuffer};
use crate::components::RiReferenceComponents;
use crate::parser::str::rfind_split_hole;
use crate::parser::trusted::is_ascii_only_host;
use crate::spec::Spec;
use crate::task::{Error as TaskError, ProcessAndWrite};
use crate::types::{RiAbsoluteStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiAbsoluteString, RiString};
pub use self::error::Error;
pub(crate) use self::path::{Path, PathToNormalize};
pub(crate) use self::pct_case::{
is_pct_case_normalized, DisplayNormalizedAsciiOnlyHost, DisplayPctCaseNormalize,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct NormalizationOp {
pub(crate) case_pct_normalization: bool,
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct NormalizationInput<'a> {
scheme: &'a str,
authority: Option<&'a str>,
path: Path<'a>,
query: Option<&'a str>,
fragment: Option<&'a str>,
op: NormalizationOp,
}
impl<'a, S: Spec> From<&'a RiStr<S>> for NormalizationInput<'a> {
fn from(iri: &'a RiStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
NormalizationInput {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
case_pct_normalization: false,
},
}
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiString<S>> for NormalizationInput<'a> {
#[inline]
fn from(iri: &'a RiString<S>) -> Self {
Self::from(iri.as_slice())
}
}
impl<'a, S: Spec> From<&'a RiAbsoluteStr<S>> for NormalizationInput<'a> {
fn from(iri: &'a RiAbsoluteStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
NormalizationInput {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
case_pct_normalization: false,
},
}
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiAbsoluteString<S>> for NormalizationInput<'a> {
#[inline]
fn from(iri: &'a RiAbsoluteString<S>) -> Self {
Self::from(iri.as_slice())
}
}
impl NormalizationInput<'_> {
pub(crate) fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> {
if self.authority.is_some() {
return Ok(());
}
match self.path {
Path::Done(_) => Ok(()),
Path::NeedsProcessing(path) => path.ensure_rfc3986_normalizable_with_authority_absent(),
}
}
}
struct DisplayNormalize<'a, S> {
input: NormalizationInput<'a>,
_spec: PhantomData<fn() -> S>,
}
impl<S: Spec> fmt::Debug for DisplayNormalize<'_, S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("DisplayNormalize")
.field("input", &self.input)
.finish()
}
}
impl<'a, S: Spec> DisplayNormalize<'a, S> {
#[inline]
#[must_use]
fn from_input(input: NormalizationInput<'a>) -> Self {
Self {
input,
_spec: PhantomData,
}
}
}
impl<S: Spec> fmt::Display for DisplayNormalize<'_, S> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.input.op.case_pct_normalization {
normalize_scheme(f, self.input.scheme)?;
} else {
f.write_str(self.input.scheme)?;
}
f.write_str(":")?;
if let Some(authority) = self.input.authority {
f.write_str("//")?;
if self.input.op.case_pct_normalization {
normalize_authority::<S>(f, authority)?;
} else {
f.write_str(authority)?;
}
}
match self.input.path {
Path::Done(s) => f.write_str(s)?,
Path::NeedsProcessing(path) => {
path.fmt_write_normalize::<S, _>(f, self.input.op, self.input.authority.is_some())?
}
}
if let Some(query) = self.input.query {
f.write_char('?')?;
if self.input.op.case_pct_normalization {
normalize_query::<S>(f, query)?;
} else {
f.write_str(query)?;
}
}
if let Some(fragment) = self.input.fragment {
f.write_char('#')?;
if self.input.op.case_pct_normalization {
normalize_fragment::<S>(f, fragment)?;
} else {
f.write_str(fragment)?;
}
}
Ok(())
}
}
pub(crate) fn normalize_scheme(f: &mut fmt::Formatter<'_>, scheme: &str) -> fmt::Result {
scheme
.chars()
.map(|c| c.to_ascii_lowercase())
.try_for_each(|c| f.write_char(c))
}
fn normalize_authority<S: Spec>(f: &mut fmt::Formatter<'_>, authority: &str) -> fmt::Result {
let host_port = match rfind_split_hole(authority, b'@') {
Some((userinfo, host_port)) => {
DisplayPctCaseNormalize::<S>::new(userinfo).fmt(f)?;
f.write_char('@')?;
host_port
}
None => authority,
};
normalize_host_port::<S>(f, host_port)
}
pub(crate) fn normalize_host_port<S: Spec>(
f: &mut fmt::Formatter<'_>,
host_port: &str,
) -> fmt::Result {
let host_port = host_port.strip_suffix(':').unwrap_or(host_port);
if is_ascii_only_host(host_port) {
DisplayNormalizedAsciiOnlyHost::new(host_port).fmt(f)
} else {
DisplayPctCaseNormalize::<S>::new(host_port).fmt(f)
}
}
pub(crate) fn normalize_query<S: Spec>(f: &mut fmt::Formatter<'_>, query: &str) -> fmt::Result {
DisplayPctCaseNormalize::<S>::new(query).fmt(f)
}
pub(crate) fn normalize_fragment<S: Spec>(
f: &mut fmt::Formatter<'_>,
fragment: &str,
) -> fmt::Result {
DisplayPctCaseNormalize::<S>::new(fragment).fmt(f)
}
#[derive(Debug, Clone, Copy)]
pub struct NormalizationTask<'a, T: ?Sized> {
input: NormalizationInput<'a>,
whatwg_serialization: bool,
_ty_str: PhantomData<fn() -> T>,
}
impl<'a, T: ?Sized> NormalizationTask<'a, T> {
#[inline]
#[must_use]
pub(crate) fn new(
scheme: &'a str,
authority: Option<&'a str>,
path: Path<'a>,
query: Option<&'a str>,
fragment: Option<&'a str>,
op: NormalizationOp,
whatwg_serialization: bool,
) -> Self {
Self {
input: NormalizationInput {
scheme,
authority,
path,
query,
fragment,
op,
},
whatwg_serialization,
_ty_str: PhantomData,
}
}
}
impl<'a, S: Spec> From<&'a RiStr<S>> for NormalizationTask<'a, RiStr<S>> {
fn from(iri: &'a RiStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
Self {
input: NormalizationInput {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
case_pct_normalization: true,
},
},
whatwg_serialization: false,
_ty_str: PhantomData,
}
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiString<S>> for NormalizationTask<'a, RiStr<S>> {
#[inline]
fn from(iri: &'a RiString<S>) -> Self {
Self::from(iri.as_slice())
}
}
impl<'a, S: Spec> From<&'a RiAbsoluteStr<S>> for NormalizationTask<'a, RiAbsoluteStr<S>> {
fn from(iri: &'a RiAbsoluteStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
Self {
input: NormalizationInput {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
case_pct_normalization: true,
},
},
whatwg_serialization: false,
_ty_str: PhantomData,
}
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiAbsoluteString<S>> for NormalizationTask<'a, RiAbsoluteStr<S>> {
#[inline]
fn from(iri: &'a RiAbsoluteString<S>) -> Self {
Self::from(iri.as_slice())
}
}
impl<'a, T: ?Sized + AsRef<str>> NormalizationTask<'a, T> {
#[inline]
pub fn enable_normalization(&mut self) {
self.input.op.case_pct_normalization = true;
}
#[inline]
pub fn enable_whatwg_serialization(&mut self) {
self.whatwg_serialization = true;
}
fn write_to_buf<'b, B: Buffer<'b>, S: Spec>(
&self,
mut buf: B,
) -> Result<&'b [u8], TaskError<Error>>
where
TaskError<Error>: From<B::ExtendError>,
{
if !self.whatwg_serialization {
self.input
.ensure_rfc3986_normalizable()
.map_err(TaskError::Process)?;
}
let buf_offset = buf.as_bytes().len();
let mut writer = FmtWritableBuffer::new(&mut buf);
match write!(writer, "{}", DisplayNormalize::<S>::from_input(self.input)) {
Ok(_) => Ok(&buf.into_bytes()[buf_offset..]),
Err(_) => Err(writer.take_error_unwrap().into()),
}
}
#[must_use]
pub fn estimate_max_buf_size_for_resolution(&self) -> usize {
let known_exact = self.input.scheme.len()
+ self.input.authority.map_or(0, |s| s.len() + 2)
+ self.input.query.map_or(0, |s| s.len() + 1)
+ self.input.fragment.map_or(0, |s| s.len() + 1);
let path_max = match &self.input.path {
Path::Done(s) => s.len(),
Path::NeedsProcessing(path) => path.len(),
};
known_exact + path_max
}
}
impl<S: Spec> ProcessAndWrite for &NormalizationTask<'_, RiStr<S>> {
type OutputBorrowed = RiStr<S>;
#[cfg(feature = "alloc")]
type OutputOwned = RiString<S>;
type ProcessError = Error;
#[cfg(feature = "alloc")]
fn allocate_and_write(self) -> Result<Self::OutputOwned, TaskError<Self::ProcessError>> {
let mut s = String::new();
self.write_to_buf::<_, S>(&mut s)?;
Ok(RiString::try_from(s).expect("[consistency] the resolved IRI must be valid"))
}
fn write_to_byte_slice(
self,
buf: &mut [u8],
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let buf = ByteSliceBuf::new(buf);
let s = self.write_to_buf::<_, S>(buf)?;
let s = <&RiStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
#[cfg(feature = "alloc")]
fn append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, Self::ProcessError> {
match self.try_append_to_std_string(buf) {
Ok(v) => Ok(v),
Err(TaskError::Buffer(e)) => panic!("buffer error: {}", e),
Err(TaskError::Process(e)) => Err(e),
}
}
#[cfg(feature = "alloc")]
fn try_append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let s = self.write_to_buf::<_, S>(buf)?;
let s = <&RiStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
}
impl<S: Spec> ProcessAndWrite for &NormalizationTask<'_, RiAbsoluteStr<S>> {
type OutputBorrowed = RiAbsoluteStr<S>;
#[cfg(feature = "alloc")]
type OutputOwned = RiAbsoluteString<S>;
type ProcessError = Error;
#[cfg(feature = "alloc")]
fn allocate_and_write(self) -> Result<Self::OutputOwned, TaskError<Self::ProcessError>> {
let mut s = String::new();
self.write_to_buf::<_, S>(&mut s)?;
Ok(RiAbsoluteString::try_from(s).expect("[consistency] the resolved IRI must be valid"))
}
fn write_to_byte_slice(
self,
buf: &mut [u8],
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let buf = ByteSliceBuf::new(buf);
let s = self.write_to_buf::<_, S>(buf)?;
let s =
<&RiAbsoluteStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
#[cfg(feature = "alloc")]
fn append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, Self::ProcessError> {
match self.try_append_to_std_string(buf) {
Ok(v) => Ok(v),
Err(TaskError::Buffer(e)) => panic!("buffer error: {}", e),
Err(TaskError::Process(e)) => Err(e),
}
}
#[cfg(feature = "alloc")]
fn try_append_to_std_string(
self,
buf: &mut String,
) -> Result<&Self::OutputBorrowed, TaskError<Self::ProcessError>> {
let s = self.write_to_buf::<_, S>(buf)?;
let s =
<&RiAbsoluteStr<S>>::try_from(s).expect("[consistency] the resolved IRI must be valid");
Ok(s)
}
}
#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests_display {
use super::*;
use crate::spec::{IriSpec, UriSpec};
#[test]
fn normalize_iri_1() {
let disp = DisplayNormalize::<IriSpec> {
input: NormalizationInput {
scheme: "http",
authority: Some("user:pass@example.com:80"),
path: Path::NeedsProcessing(PathToNormalize::from_paths_to_be_resolved(
"/1/2/3/4/.././5/../6/",
"a/b/c/d/e/f/g/h/i/../../../j/k/l/../../../../m/n/./o",
)),
query: Some("query"),
fragment: Some("fragment"),
op: NormalizationOp {
case_pct_normalization: true,
},
},
_spec: PhantomData,
};
assert_eq!(
disp.to_string(),
"http://user:pass@example.com:80/1/2/3/6/a/b/c/d/e/m/n/o?query#fragment"
);
}
#[test]
fn normalize_iri_2() {
let disp = DisplayNormalize::<IriSpec> {
input: NormalizationInput {
scheme: "http",
authority: Some("user:pass@example.com:80"),
path: Path::NeedsProcessing(PathToNormalize::from_paths_to_be_resolved(
"/%7e/2/beta=%CE%B2/4/.././5/../6/",
"a/b/alpha=%CE%B1/d/e/f/g/h/i/../../../j/k/l/../../../../%3c/%7e/./%3e",
)),
query: Some("query"),
fragment: Some("fragment"),
op: NormalizationOp {
case_pct_normalization: true,
},
},
_spec: PhantomData,
};
assert_eq!(
disp.to_string(),
"http://user:pass@example.com:80/~/2/beta=\u{03B2}/6/a/b/alpha=\u{03B1}/d/e/%3C/~/%3E?query#fragment"
);
}
#[test]
fn normalize_uri_1() {
let disp = DisplayNormalize::<UriSpec> {
input: NormalizationInput {
scheme: "http",
authority: Some("user:pass@example.com:80"),
path: Path::NeedsProcessing(PathToNormalize::from_paths_to_be_resolved(
"/%7e/2/beta=%ce%b2/4/.././5/../6/",
"a/b/alpha=%CE%B1/d/e/f/g/h/i/../../../j/k/l/../../../../%3c/%7e/./%3e",
)),
query: Some("query"),
fragment: Some("fragment"),
op: NormalizationOp {
case_pct_normalization: true,
},
},
_spec: PhantomData,
};
assert_eq!(
disp.to_string(),
"http://user:pass@example.com:80/~/2/beta=%CE%B2/6/a/b/alpha=%CE%B1/d/e/%3C/~/%3E?query#fragment"
);
}
#[test]
fn trailing_slash_should_remain() {
let disp = DisplayNormalize::<UriSpec> {
input: NormalizationInput {
scheme: "http",
authority: Some("example.com"),
path: Path::NeedsProcessing(PathToNormalize::from_single_path("/../../")),
query: None,
fragment: None,
op: NormalizationOp {
case_pct_normalization: true,
},
},
_spec: PhantomData,
};
assert_eq!(disp.to_string(), "http://example.com/");
}
#[test]
fn leading_double_slash_without_authority_whatwg() {
let disp = DisplayNormalize::<UriSpec> {
input: NormalizationInput {
scheme: "scheme",
authority: None,
path: Path::NeedsProcessing(PathToNormalize::from_paths_to_be_resolved(
"/a/b/", "../..//c",
)),
query: None,
fragment: None,
op: NormalizationOp {
case_pct_normalization: true,
},
},
_spec: PhantomData,
};
assert_eq!(disp.to_string(), "scheme:/.//c");
}
}
#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
use crate::types::{IriAbsoluteStr, IriReferenceStr, IriStr, UriStr};
const CASES: &[(&str, &[&str], &[&str])] = &[
(
"https://example.com/pa/th?query#frag",
&["https://example.com/pa/th?query#frag"],
&[],
),
(
"https://example.com/pA/Th?Query#Frag",
&["HTTPs://EXaMPLE.COM/pA/Th?Query#Frag"],
&[
"https://example.com/pa/th?Query#Frag",
"https://example.com/pA/Th?query#Frag",
"https://example.com/pA/Th?Query#frag",
],
),
(
"urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822",
&[
"urn:uuid:7f1450df-6678-465b-a881-188f9b6ec822",
"URN:uuid:7f1450df-6678-465b-a881-188f9b6ec822",
],
&[
"urn:UUID:7f1450df-6678-465b-a881-188f9b6ec822",
"urn:uuid:7F1450DF-6678-465B-A881-188F9B6EC822",
],
),
(
"http://example.com/a/b/d/e",
&[
"http://example.com/a/b/c/%2e%2e/d/e",
"http://example.com/a/b/c/%2E%2E/d/e",
"http://example.com/a/b/c/../d/e",
"http://example.com/a/b/c/%2E%2e/d/e",
"http://example.com/a/b/c/.%2e/d/e",
"http://example.com/a/./././././b/c/.%2e/d/e",
],
&[],
),
(
"http://example.com/~Ascii%21",
&["http://example.com/%7E%41%73%63%69%69%21"],
&[],
),
(
"https://example.com/",
&["https://example.com:/"],
&[],
),
];
#[test]
fn normalize() {
for (expected, sources, different_iris) in CASES {
let expected = IriStr::new(*expected).expect("must be a valid IRI");
assert_eq!(
expected
.try_normalize()
.expect("normalized IRI must be normalizable"),
expected,
"IRI normalization must be idempotent"
);
for src in *sources {
let src = IriStr::new(*src).expect("must be a valid IRI");
let normalized = src.try_normalize().expect("should be normalizable");
assert_eq!(normalized, expected);
}
for different in *different_iris {
let different = IriStr::new(*different).expect("must be a valid IRI");
let normalized = different.try_normalize().expect("should be normalizable");
assert_ne!(
normalized, expected,
"{:?} should not be normalized to {:?}",
different, expected
);
}
}
}
#[test]
fn normalize_percent_encoded_non_ascii_in_uri() {
let uri = UriStr::new("http://example.com/?a=%CE%B1&b=%CE%CE%B1%B1")
.expect("must be a valid URI");
let normalized = uri.try_normalize().expect("should be normalizable");
assert_eq!(normalized, "http://example.com/?a=%CE%B1&b=%CE%CE%B1%B1");
}
#[test]
fn normalize_percent_encoded_non_ascii_in_iri() {
let iri = IriStr::new("http://example.com/?a=%CE%B1&b=%CE%CE%B1%B1")
.expect("must be a valid IRI");
let normalized = iri.try_normalize().expect("should be normalizable");
assert_eq!(
normalized, "http://example.com/?a=\u{03B1}&b=%CE\u{03B1}%B1",
"U+03B1 is an unreserved character"
);
}
#[test]
fn resolution_without_normalization() {
let iri_base =
IriAbsoluteStr::new("HTTP://%55%73%65%72:%50%61%73%73@EXAMPLE.COM/path/PATH/%ce%b1%ff")
.expect("must be a valid IRI");
let iri: &IriReferenceStr = iri_base.as_ref();
let normalized = iri
.try_resolve_against(iri_base)
.expect("should produce valid result");
assert_eq!(
&*normalized,
"HTTP://%55%73%65%72:%50%61%73%73@EXAMPLE.COM/path/PATH/%ce%b1%ff"
);
}
#[test]
fn resolution_with_normalization() {
let iri_base =
IriAbsoluteStr::new("HTTP://%55%73%65%72:%50%61%73%73@EXAMPLE.COM/path/PATH/%ce%b1%ff")
.expect("must be a valid IRI");
let iri: &IriReferenceStr = iri_base.as_ref();
let normalized = iri
.try_resolve_normalize_against(iri_base)
.expect("should produce valid result");
assert_eq!(
&*normalized,
"http://User:Pass@example.com/path/PATH/\u{03B1}%FF"
);
}
#[test]
fn normalize_non_ascii_only_host() {
let uri = UriStr::new("SCHEME://Alpha%ce%b1/").expect("must be a valid URI");
let normalized = uri.try_normalize().expect("should be normalizable");
assert_eq!(normalized, "scheme://Alpha%CE%B1/");
}
#[test]
fn normalize_host_with_sub_delims() {
let uri = UriStr::new("SCHEME://PLUS%2bPLUS/").expect("must be a valid URI");
let normalized = uri.try_normalize().expect("should be normalizable");
assert_eq!(
normalized, "scheme://plus%2Bplus/",
"hexdigits in percent-encoding triplets should be normalized to uppercase"
);
}
#[test]
fn whatwg_normalization() {
let uri = UriStr::new("scheme:..///not-a-host").expect("must be a valid URI");
assert!(!uri.is_normalized_whatwg());
let normalized = uri.try_normalize_whatwg().expect("cannot allocate memory");
assert_eq!(normalized, "scheme:/.//not-a-host");
assert!(normalized.is_normalized_whatwg());
let normalized_again = uri.try_normalize_whatwg().expect("cannot allocate memory");
assert_eq!(normalized_again, normalized);
assert!(normalized_again.is_normalized_whatwg());
}
}