pub(crate) struct UrlMatch<'a> {
pub(crate) url: &'a str,
pub(crate) consumed: usize,
}
pub(crate) const DEFAULT_FORWARD_SCHEMES: &[&str] = &["http", "https"];
pub(crate) const BACKWARD_URL_SCHEMES: &[&str] = &[
"http", "https", "ftp", "ftps", "git", "ssh", "file", "mailto",
];
pub(crate) fn take_url<'a>(input: &'a str, schemes: &[&str]) -> Option<UrlMatch<'a>> {
let scheme_len = take_scheme(input, schemes)?;
let bytes = input.as_bytes();
let after_scheme = scheme_len;
if after_scheme + 3 > bytes.len()
|| bytes[after_scheme] != b':'
|| bytes[after_scheme + 1] != b'/'
|| bytes[after_scheme + 2] != b'/'
{
return None;
}
let body_start = after_scheme + 3;
let mut index = body_start;
while index < bytes.len() {
let byte = bytes[index];
if byte == b' ' || byte == b'\t' || byte == b'\n' || byte == b'\r' {
break;
}
if byte == b'>' || byte == b']' || byte == b')' || byte == b'<' || byte == b'[' {
break;
}
index += 1;
}
if index == body_start {
return None;
}
Some(UrlMatch {
url: &input[..index],
consumed: index,
})
}
fn take_scheme(input: &str, schemes: &[&str]) -> Option<usize> {
let bytes = input.as_bytes();
let mut index = 0;
while index < bytes.len() && bytes[index].is_ascii_alphabetic() {
index += 1;
}
if index == 0 {
return None;
}
let candidate = &input[..index];
for scheme in schemes {
if candidate.eq_ignore_ascii_case(scheme) {
return Some(index);
}
}
None
}
pub(crate) fn back_scan_url_fragment(text: &str, pos: usize) -> bool {
let bytes = text.as_bytes();
if pos > bytes.len() {
return false;
}
let mut index = pos;
while index > 0 {
let prev = bytes[index - 1];
if prev == b' ' || prev == b'\t' || prev == b'\n' || prev == b'\r' {
return false;
}
if prev == b'/' && index >= 3 && bytes[index - 2] == b'/' && bytes[index - 3] == b':' {
let mut scheme_start = index - 3;
while scheme_start > 0 && bytes[scheme_start - 1].is_ascii_alphabetic() {
scheme_start -= 1;
}
if scheme_start == index - 3 {
return false;
}
let scheme = &text[scheme_start..index - 3];
for sc in BACKWARD_URL_SCHEMES {
if scheme.eq_ignore_ascii_case(sc) {
return true;
}
}
return false;
}
index -= 1;
}
false
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum TrailingClass {
Safe,
Ambiguous,
}
pub(crate) fn classify_trailing(url: &str, safe_trailing_chars: &[char]) -> TrailingClass {
let Some(last) = url.chars().last() else {
return TrailingClass::Safe;
};
if last.is_ascii_alphanumeric() || last == '/' {
return TrailingClass::Safe;
}
if safe_trailing_chars.contains(&last) {
return TrailingClass::Safe;
}
TrailingClass::Ambiguous
}
#[cfg(test)]
mod tests;