use std::sync::LazyLock;
use regex::Regex;
use url::Url;
static URL_ATTR: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"(?i)(\s(?:src|href))\s*=\s*"([^"]*)""#).unwrap());
pub fn absolutize_urls(html: &str, base: &Url) -> String {
URL_ATTR
.replace_all(html, |caps: ®ex::Captures| {
let attr = &caps[1];
let resolved = resolve(&caps[2], base);
format!(r#"{attr}="{resolved}""#)
})
.to_string()
}
fn resolve(value: &str, base: &Url) -> String {
let trimmed = value.trim();
if trimmed.is_empty()
|| trimmed.starts_with('#')
|| trimmed.starts_with("data:")
|| trimmed.starts_with("mailto:")
|| trimmed.starts_with("tel:")
|| trimmed.starts_with("javascript:")
{
return value.to_string();
}
if Url::parse(trimmed).is_ok() {
return value.to_string();
}
match base.join(trimmed) {
Ok(absolute) => absolute.to_string(),
Err(_) => value.to_string(),
}
}
pub fn to_markdown(html: &str) -> String {
htmd::convert(html).unwrap_or_else(|_| html.to_string())
}