pub(crate) mod authority;
use core::cmp::Ordering;
use core::num::NonZeroUsize;
use crate::components::{RiReferenceComponents, Splitter};
use crate::format::eq_str_display;
use crate::normalize::{is_pct_case_normalized, NormalizedAsciiOnlyHost, NormalizednessCheckMode};
use crate::parser::str::{find_split2, find_split3, find_split4_hole, find_split_hole};
use crate::spec::Spec;
use crate::types::RiReferenceStr;
#[must_use]
fn scheme_colon(i: &str) -> (&str, &str) {
let (scheme, rest) =
find_split_hole(i, b':').expect("[precondition] absolute IRIs must have `scheme` part");
(rest, scheme)
}
#[must_use]
fn scheme_colon_opt(i: &str) -> (&str, Option<&str>) {
match find_split4_hole(i, b':', b'/', b'?', b'#') {
Some((scheme, b':', rest)) => (rest, Some(scheme)),
_ => (i, None),
}
}
#[must_use]
fn slash_slash_authority_opt(i: &str) -> (&str, Option<&str>) {
let s = match i.strip_prefix("//") {
Some(rest) => rest,
None => return (i, None),
};
match find_split3(s, b'/', b'?', b'#') {
Some((authority, rest)) => (rest, Some(authority)),
None => ("", Some(s)),
}
}
#[must_use]
fn until_query(i: &str) -> (&str, &str) {
match find_split2(i, b'?', b'#') {
Some((before_query, rest)) => (rest, before_query),
None => ("", i),
}
}
#[must_use]
fn decompose_query_and_fragment(i: &str) -> (Option<&str>, Option<&str>) {
match i.as_bytes().first().copied() {
None => (None, None),
Some(b'?') => {
let rest = &i[1..];
match find_split_hole(rest, b'#') {
Some((query, fragment)) => (Some(query), Some(fragment)),
None => (Some(rest), None),
}
}
Some(c) => {
debug_assert_eq!(c, b'#');
(None, Some(&i[1..]))
}
}
}
#[must_use]
pub(crate) fn decompose_iri_reference<S: Spec>(
i: &RiReferenceStr<S>,
) -> RiReferenceComponents<'_, S> {
fn decompose(i: &str) -> Splitter {
let len = i.len();
let (i, scheme_end) = {
let (i, scheme) = scheme_colon_opt(i);
let end = scheme.and_then(|s| NonZeroUsize::new(s.len()));
(i, end)
};
let (i, authority_end) = {
let start = len - i.len() + 2;
let (i, authority) = slash_slash_authority_opt(i);
let end = authority.and_then(|s| NonZeroUsize::new(start + s.len()));
(i, end)
};
let (i, _path) = until_query(i);
let (query_start, fragment_start) = {
let after_first_prefix = NonZeroUsize::new((len - i.len()).wrapping_add(1));
let (query, fragment) = decompose_query_and_fragment(i);
match (query.is_some(), fragment) {
(true, Some(fragment)) => {
(after_first_prefix, NonZeroUsize::new(len - fragment.len()))
}
(true, None) => (after_first_prefix, None),
(false, Some(_fragment)) => (None, after_first_prefix),
(false, None) => (None, None),
}
};
Splitter::new(scheme_end, authority_end, query_start, fragment_start)
}
RiReferenceComponents {
iri: i,
splitter: decompose(i.as_str()),
}
}
#[inline]
#[must_use]
pub(crate) fn extract_scheme(i: &str) -> Option<&str> {
scheme_colon_opt(i).1
}
#[inline]
#[must_use]
pub(crate) fn extract_scheme_absolute(i: &str) -> &str {
scheme_colon(i).1
}
#[inline]
#[must_use]
pub(crate) fn extract_authority(i: &str) -> Option<&str> {
let (i, _scheme) = scheme_colon_opt(i);
slash_slash_authority_opt(i).1
}
#[inline]
#[must_use]
pub(crate) fn extract_authority_absolute(i: &str) -> Option<&str> {
let (i, _scheme) = scheme_colon(i);
slash_slash_authority_opt(i).1
}
#[inline]
#[must_use]
pub(crate) fn extract_authority_relative(i: &str) -> Option<&str> {
slash_slash_authority_opt(i).1
}
#[inline]
#[must_use]
pub(crate) fn extract_path(i: &str) -> &str {
let (i, _scheme) = scheme_colon_opt(i);
let (i, _authority) = slash_slash_authority_opt(i);
until_query(i).1
}
#[inline]
#[must_use]
pub(crate) fn extract_path_absolute(i: &str) -> &str {
let (i, _scheme) = scheme_colon(i);
let (i, _authority) = slash_slash_authority_opt(i);
until_query(i).1
}
#[inline]
#[must_use]
pub(crate) fn extract_path_relative(i: &str) -> &str {
let (i, _authority) = slash_slash_authority_opt(i);
until_query(i).1
}
#[inline]
#[must_use]
pub(crate) fn extract_query(i: &str) -> Option<&str> {
let (i, _before_query) = until_query(i);
decompose_query_and_fragment(i).0
}
#[must_use]
pub(crate) fn extract_query_absolute_iri(i: &str) -> Option<&str> {
let (i, _before_query) = until_query(i);
if i.is_empty() {
None
} else {
debug_assert_eq!(
i.as_bytes().first(),
Some(&b'?'),
"`absolute-IRI` string must not have `fragment part"
);
Some(&i[1..])
}
}
#[inline]
#[must_use]
pub(crate) fn split_fragment(iri: &str) -> (&str, Option<&str>) {
match find_split_hole(iri, b'#') {
Some((prefix, fragment)) => (prefix, Some(fragment)),
None => (iri, None),
}
}
#[inline]
#[must_use]
pub(crate) fn extract_fragment(iri: &str) -> Option<&str> {
split_fragment(iri).1
}
#[must_use]
pub(crate) fn is_normalized<S: Spec>(i: &str, mode: NormalizednessCheckMode) -> bool {
let (i, scheme) = scheme_colon(i);
let (after_authority, authority) = slash_slash_authority_opt(i);
let (_after_path, path) = until_query(after_authority);
if scheme.bytes().any(|b| b.is_ascii_uppercase()) {
return false;
}
if let Some(authority) = authority {
let authority_components = authority::decompose_authority(authority);
let host = authority_components.host();
let host_is_normalized = if is_ascii_only_host(host) {
eq_str_display(host, &NormalizedAsciiOnlyHost::new(host))
} else {
is_pct_case_normalized::<S>(host)
};
if !host_is_normalized {
return false;
}
if let Some(userinfo) = authority_components.userinfo() {
if !is_pct_case_normalized::<S>(userinfo) {
return false;
}
}
}
let path_span_no_dot_segments = if authority.is_some() {
Some(path)
} else {
match mode {
NormalizednessCheckMode::Default => Some(path.strip_prefix("/.//").unwrap_or(path)),
NormalizednessCheckMode::Rfc3986 => Some(path),
NormalizednessCheckMode::PreserveAuthoritylessRelativePath => {
if path.starts_with('/') {
Some(path.strip_prefix("/.//").unwrap_or(path))
} else {
None
}
}
}
};
if let Some(path_span_no_dot_segments) = path_span_no_dot_segments {
if path_span_no_dot_segments
.split('/')
.any(|segment| matches!(segment, "." | ".."))
{
return false;
}
}
is_pct_case_normalized::<S>(after_authority)
}
#[must_use]
pub(super) fn hexdigits_to_byte([upper, lower]: [u8; 2]) -> u8 {
let i_upper = match (upper & 0xf0).cmp(&0x40) {
Ordering::Less => upper - b'0',
Ordering::Equal => upper - (b'A' - 10),
Ordering::Greater => upper - (b'a' - 10),
};
let i_lower = match (lower & 0xf0).cmp(&0x40) {
Ordering::Less => lower - b'0',
Ordering::Equal => lower - (b'A' - 10),
Ordering::Greater => lower - (b'a' - 10),
};
(i_upper << 4) + i_lower
}
#[must_use]
pub(crate) fn take_xdigits2(s: &str) -> (u8, &str) {
let mut bytes = s.bytes();
let upper_xdigit = bytes
.next()
.expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
let lower_xdigit = bytes
.next()
.expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
let v = hexdigits_to_byte([upper_xdigit, lower_xdigit]);
(v, &s[2..])
}
#[must_use]
pub(crate) fn is_ascii_only_host(mut host: &str) -> bool {
while let Some((i, c)) = host
.char_indices()
.find(|(_i, c)| !c.is_ascii() || *c == '%')
{
if c != '%' {
debug_assert!(!c.is_ascii());
return false;
}
let after_pct = &host[(i + 1)..];
let (byte, rest) = take_xdigits2(after_pct);
if !byte.is_ascii() {
return false;
}
host = rest;
}
true
}