use scraper::{Html, Selector};
use url::Url;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ResourceHint {
pub kind: HintKind,
pub url: Url,
pub as_: Option<String>,
pub crossorigin: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HintKind {
DnsPrefetch,
Preconnect,
Preload,
ModulePreload,
}
impl HintKind {
pub fn as_str(self) -> &'static str {
match self {
HintKind::DnsPrefetch => "dns-prefetch",
HintKind::Preconnect => "preconnect",
HintKind::Preload => "preload",
HintKind::ModulePreload => "modulepreload",
}
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct ResourceHints {
pub all: Vec<ResourceHint>,
}
impl ResourceHints {
pub fn of(&self, kind: HintKind) -> impl Iterator<Item = &ResourceHint> {
self.all.iter().filter(move |h| h.kind == kind)
}
pub fn is_empty(&self) -> bool {
self.all.is_empty()
}
pub fn len(&self) -> usize {
self.all.len()
}
}
pub fn extract_resource_hints(base: &Url, html: &str) -> ResourceHints {
let doc = Html::parse_document(html);
let mut out = Vec::new();
let sel = match Selector::parse("link[rel][href]") {
Ok(s) => s,
Err(_) => return ResourceHints::default(),
};
for el in doc.select(&sel) {
let rel = el.value().attr("rel").unwrap_or("").to_ascii_lowercase();
let href = match el.value().attr("href") {
Some(h) if !h.is_empty() => h,
_ => continue,
};
let resolved = match base.join(href) {
Ok(u) => u,
Err(_) => continue,
};
if !matches!(resolved.scheme(), "http" | "https") {
continue;
}
let as_ = el
.value()
.attr("as")
.map(|s| s.trim().to_ascii_lowercase())
.filter(|s| !s.is_empty());
let crossorigin = el
.value()
.attr("crossorigin")
.map(|s| s.trim().to_ascii_lowercase());
for token in rel.split_ascii_whitespace() {
let kind = match token {
"dns-prefetch" => HintKind::DnsPrefetch,
"preconnect" => HintKind::Preconnect,
"preload" => HintKind::Preload,
"modulepreload" => HintKind::ModulePreload,
_ => continue,
};
out.push(ResourceHint {
kind,
url: resolved.clone(),
as_: as_.clone(),
crossorigin: crossorigin.clone(),
});
}
}
ResourceHints { all: out }
}
#[cfg(test)]
mod tests {
use super::*;
fn base() -> Url {
Url::parse("https://example.test/page").unwrap()
}
#[test]
fn extracts_dns_prefetch_and_preconnect() {
let html = r#"
<!doctype html><html><head>
<link rel="dns-prefetch" href="//cdn.example.com">
<link rel="preconnect" href="https://api.example.com" crossorigin>
</head><body></body></html>
"#;
let hints = extract_resource_hints(&base(), html);
assert_eq!(hints.len(), 2);
let dp: Vec<_> = hints.of(HintKind::DnsPrefetch).collect();
assert_eq!(dp.len(), 1);
assert_eq!(dp[0].url.host_str(), Some("cdn.example.com"));
let pc: Vec<_> = hints.of(HintKind::Preconnect).collect();
assert_eq!(pc.len(), 1);
assert_eq!(pc[0].url.host_str(), Some("api.example.com"));
assert_eq!(pc[0].crossorigin.as_deref(), Some(""));
}
#[test]
fn preload_with_as_attribute() {
let html = r#"
<html><head>
<link rel="preload" href="/main.js" as="script">
<link rel="modulepreload" href="/m.mjs">
</head></html>
"#;
let hints = extract_resource_hints(&base(), html);
assert_eq!(hints.len(), 2);
let pl: Vec<_> = hints.of(HintKind::Preload).collect();
assert_eq!(pl.len(), 1);
assert_eq!(pl[0].as_.as_deref(), Some("script"));
assert_eq!(pl[0].url.as_str(), "https://example.test/main.js");
let mp: Vec<_> = hints.of(HintKind::ModulePreload).collect();
assert_eq!(mp.len(), 1);
assert_eq!(mp[0].url.as_str(), "https://example.test/m.mjs");
}
#[test]
fn space_separated_rel_tokens_emit_multiple_hints() {
let html = r#"
<html><head>
<link rel="preload modulepreload" href="/dual.mjs" as="script">
</head></html>
"#;
let hints = extract_resource_hints(&base(), html);
assert_eq!(hints.len(), 2);
let kinds: Vec<_> = hints.all.iter().map(|h| h.kind).collect();
assert!(kinds.contains(&HintKind::Preload));
assert!(kinds.contains(&HintKind::ModulePreload));
}
#[test]
fn ignores_non_http_and_unknown_rel() {
let html = r#"
<html><head>
<link rel="icon" href="/favicon.ico">
<link rel="stylesheet" href="/a.css">
<link rel="preload" href="javascript:alert(1)">
<link rel="preload" href="data:text/plain,abc">
<link rel="preconnect" href="">
</head></html>
"#;
let hints = extract_resource_hints(&base(), html);
assert!(hints.is_empty(), "unexpected hints: {:?}", hints.all);
}
#[test]
fn relative_and_protocol_relative_href_resolve() {
let html = r#"
<html><head>
<link rel="dns-prefetch" href="//cdn.a.test/">
<link rel="preconnect" href="/api">
</head></html>
"#;
let hints = extract_resource_hints(&base(), html);
assert_eq!(hints.len(), 2);
assert_eq!(hints.all[0].url.scheme(), "https");
assert_eq!(hints.all[0].url.host_str(), Some("cdn.a.test"));
assert_eq!(hints.all[1].url.as_str(), "https://example.test/api");
}
}