use once_cell::sync::Lazy;
use regex::Regex;
use scraper::{Html, Selector};
use serde::Serialize;
#[derive(Serialize)]
pub struct Performance {
pub images: ImageSignals,
pub render_blocking: RenderBlocking,
pub resource_hints: ResourceHints,
pub fonts: FontSignals,
pub inline_bytes: InlineBundleBytes,
}
#[derive(Serialize)]
pub struct ImageSignals {
pub total: usize,
pub eligible_for_lazy_missing: usize,
pub missing_dimensions: usize,
pub non_descriptive_filenames: usize,
pub modern_format_via_picture: usize,
pub modern_format_via_src: usize,
}
#[derive(Serialize)]
pub struct RenderBlocking {
pub head_scripts_blocking: usize,
pub head_stylesheets: usize,
}
#[derive(Serialize)]
pub struct ResourceHints {
pub preload: usize,
pub preconnect: usize,
pub dns_prefetch: usize,
pub modulepreload: usize,
pub preloads_an_image: bool,
}
#[derive(Serialize)]
pub struct FontSignals {
pub external_link_count: usize,
pub has_font_display_strategy: bool,
pub preloaded: usize,
}
#[derive(Serialize)]
pub struct InlineBundleBytes {
pub inline_css: usize,
pub inline_js: usize,
}
static NONDESCRIPTIVE_FILENAME: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?i)(^|/)(img[_-]?\d+|dsc[_-]?\d+|image[_-]?\d+|photo[_-]?\d+|picture[_-]?\d+|screen ?shot|screenshot)([_-]?\d+)?\.(jpe?g|png|gif|webp|avif|heic)$",
)
.unwrap()
});
static MODERN_FORMAT: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\.(webp|avif|jxl)(\?|#|$)").unwrap());
static FONT_DISPLAY: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)font-display\s*:\s*(swap|optional|fallback)").unwrap());
const FOLD_BUFFER: usize = 2;
pub fn extract(doc: &Html, raw_html: &str) -> Performance {
Performance {
images: image_signals(doc),
render_blocking: render_blocking(doc),
resource_hints: resource_hints(doc),
fonts: font_signals(doc, raw_html),
inline_bytes: inline_bytes(doc),
}
}
fn image_signals(doc: &Html) -> ImageSignals {
let sel = Selector::parse("img").unwrap();
let picture_sel = Selector::parse("picture > source[type^=\"image/\"]").unwrap();
let imgs: Vec<_> = doc.select(&sel).collect();
let total = imgs.len();
let mut eligible_missing = 0;
let mut missing_dims = 0;
let mut non_descriptive = 0;
let mut modern_src = 0;
for (idx, el) in imgs.iter().enumerate() {
let loading = el.value().attr("loading").unwrap_or("").to_ascii_lowercase();
let width = el.value().attr("width").unwrap_or("").trim();
let height = el.value().attr("height").unwrap_or("").trim();
let src = el.value().attr("src").unwrap_or("");
if idx >= FOLD_BUFFER && loading != "lazy" {
eligible_missing += 1;
}
if width.is_empty() || height.is_empty() {
missing_dims += 1;
}
if NONDESCRIPTIVE_FILENAME.is_match(src) {
non_descriptive += 1;
}
if MODERN_FORMAT.is_match(src) {
modern_src += 1;
}
}
let modern_picture = doc.select(&picture_sel).count();
ImageSignals {
total,
eligible_for_lazy_missing: eligible_missing,
missing_dimensions: missing_dims,
non_descriptive_filenames: non_descriptive,
modern_format_via_picture: modern_picture,
modern_format_via_src: modern_src,
}
}
fn render_blocking(doc: &Html) -> RenderBlocking {
let head_sel = Selector::parse("head").unwrap();
let head = match doc.select(&head_sel).next() {
Some(h) => h,
None => {
return RenderBlocking {
head_scripts_blocking: 0,
head_stylesheets: 0,
};
}
};
let script_sel = Selector::parse("script[src]").unwrap();
let mut blocking_scripts = 0;
for s in head.select(&script_sel) {
let has_async = s.value().attr("async").is_some();
let has_defer = s.value().attr("defer").is_some();
let is_module = s.value().attr("type").is_some_and(|t| t.eq_ignore_ascii_case("module"));
if !has_async && !has_defer && !is_module {
blocking_scripts += 1;
}
}
let css_sel = Selector::parse("link[rel=\"stylesheet\"]").unwrap();
let stylesheets = head.select(&css_sel).count();
RenderBlocking {
head_scripts_blocking: blocking_scripts,
head_stylesheets: stylesheets,
}
}
fn resource_hints(doc: &Html) -> ResourceHints {
let link_sel = Selector::parse("link[rel]").unwrap();
let mut preload = 0;
let mut preconnect = 0;
let mut dns_prefetch = 0;
let mut modulepreload = 0;
let mut preloads_image = false;
for el in doc.select(&link_sel) {
let rel = el.value().attr("rel").unwrap_or("").to_ascii_lowercase();
if rel.contains("preload") && !rel.contains("modulepreload") {
preload += 1;
if el
.value()
.attr("as")
.is_some_and(|a| a.eq_ignore_ascii_case("image"))
{
preloads_image = true;
}
}
if rel.contains("preconnect") {
preconnect += 1;
}
if rel.contains("dns-prefetch") {
dns_prefetch += 1;
}
if rel.contains("modulepreload") {
modulepreload += 1;
}
}
ResourceHints {
preload,
preconnect,
dns_prefetch,
modulepreload,
preloads_an_image: preloads_image,
}
}
fn font_signals(doc: &Html, raw_html: &str) -> FontSignals {
let link_sel = Selector::parse("link[rel]").unwrap();
let mut external = 0;
let mut preloaded = 0;
for el in doc.select(&link_sel) {
let rel = el.value().attr("rel").unwrap_or("").to_ascii_lowercase();
let href = el.value().attr("href").unwrap_or("");
let as_attr = el.value().attr("as").unwrap_or("").to_ascii_lowercase();
let is_font_link =
(rel.contains("preload") && as_attr == "font") || is_font_host(href);
if is_font_link {
external += 1;
}
if rel.contains("preload") && as_attr == "font" {
preloaded += 1;
}
}
let has_strategy = FONT_DISPLAY.is_match(raw_html);
FontSignals {
external_link_count: external,
has_font_display_strategy: has_strategy,
preloaded,
}
}
fn is_font_host(href: &str) -> bool {
let h = href.to_ascii_lowercase();
h.contains("fonts.googleapis.com")
|| h.contains("fonts.gstatic.com")
|| h.contains("use.typekit.net")
|| h.contains("use.fontawesome.com")
|| h.contains("typekit.net")
|| h.ends_with(".woff")
|| h.ends_with(".woff2")
|| h.contains(".woff2?")
}
fn inline_bytes(doc: &Html) -> InlineBundleBytes {
let style_sel = Selector::parse("style").unwrap();
let script_sel = Selector::parse("script:not([src])").unwrap();
let css: usize = doc
.select(&style_sel)
.map(|el| el.text().map(str::len).sum::<usize>())
.sum();
let js: usize = doc
.select(&script_sel)
.map(|el| el.text().map(str::len).sum::<usize>())
.sum();
InlineBundleBytes {
inline_css: css,
inline_js: js,
}
}
pub fn suggestions(p: &Performance) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
if p.images.eligible_for_lazy_missing > 0 {
out.push(format!(
"{} below-the-fold image{} without `loading=\"lazy\"`. Lazy-load every image past the first two to cut initial bytes.",
p.images.eligible_for_lazy_missing,
if p.images.eligible_for_lazy_missing == 1 { "" } else { "s" },
));
}
if p.images.missing_dimensions > 0 && p.images.total > 0 {
out.push(format!(
"{} of {} `<img>` missing width/height. Each contributes to Cumulative Layout Shift on first paint.",
p.images.missing_dimensions, p.images.total,
));
}
if p.images.non_descriptive_filenames > 0 {
out.push(format!(
"{} image filename{} look templated (IMG_1234, DSC00012, screenshot.png). Rename — image search and multimodal AI read the filename.",
p.images.non_descriptive_filenames,
if p.images.non_descriptive_filenames == 1 { "" } else { "s" },
));
}
if p.images.total >= 4
&& p.images.modern_format_via_picture == 0
&& p.images.modern_format_via_src == 0
{
out.push(
"No WebP / AVIF / JXL detected. Modern formats cut 25–50% off image bytes versus JPEG/PNG.".into(),
);
}
if p.render_blocking.head_scripts_blocking > 0 {
out.push(format!(
"{} render-blocking `<script>` in `<head>` (no async/defer/type=module). Each one delays first paint.",
p.render_blocking.head_scripts_blocking,
));
}
if p.render_blocking.head_stylesheets > 4 {
out.push(format!(
"{} `<link rel=\"stylesheet\">` in `<head>`. Critical CSS should be inline; ship the rest async.",
p.render_blocking.head_stylesheets,
));
}
if p.fonts.external_link_count > 0 && !p.fonts.has_font_display_strategy {
out.push(
"External fonts present but no `font-display: swap|optional|fallback` declared. Invisible text during font load (FOIT) on slow networks.".into(),
);
}
if p.images.total >= 1 && !p.resource_hints.preloads_an_image && p.images.total > 0 {
out.push(
"No `<link rel=\"preload\" as=\"image\">` for the LCP image. Browser discovers the hero image only after the HTML parser reaches it.".into(),
);
}
if p.inline_bytes.inline_css > 50_000 {
out.push(format!(
"{} KB of inline `<style>`. Past ~50KB the critical-CSS shortcut becomes its own performance problem.",
p.inline_bytes.inline_css / 1024,
));
}
if p.inline_bytes.inline_js > 50_000 {
out.push(format!(
"{} KB of inline `<script>`. Bundle and defer — inline JS blocks parsing.",
p.inline_bytes.inline_js / 1024,
));
}
out
}