use scraper::{Html, Selector};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RenderDecision {
Static,
NeedsBrowser(BrowserReason),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BrowserReason {
EmptyContent,
HeavyScripts,
JsFramework(String),
LazyLoading,
MissingSelectors,
SinglePageApp,
}
pub struct RenderChecker {
script_threshold: usize,
min_content_length: usize,
required_selectors: Vec<String>,
}
impl Default for RenderChecker {
fn default() -> Self {
Self {
script_threshold: 10,
min_content_length: 500,
required_selectors: Vec::new(),
}
}
}
impl RenderChecker {
pub fn new() -> Self {
Self::default()
}
pub fn with_script_threshold(mut self, threshold: usize) -> Self {
self.script_threshold = threshold;
self
}
pub fn with_min_content(mut self, min_length: usize) -> Self {
self.min_content_length = min_length;
self
}
pub fn with_required_selectors(mut self, selectors: Vec<String>) -> Self {
self.required_selectors = selectors;
self
}
pub fn check(&self, html: &str) -> RenderDecision {
let document = Html::parse_document(html);
if let Some(reason) = self.check_empty_content(&document) {
return RenderDecision::NeedsBrowser(reason);
}
if let Some(reason) = self.check_js_frameworks(html) {
return RenderDecision::NeedsBrowser(reason);
}
if let Some(reason) = self.check_heavy_scripts(&document) {
return RenderDecision::NeedsBrowser(reason);
}
if let Some(reason) = self.check_required_selectors(&document) {
return RenderDecision::NeedsBrowser(reason);
}
if let Some(reason) = self.check_spa_patterns(&document) {
return RenderDecision::NeedsBrowser(reason);
}
RenderDecision::Static
}
fn check_empty_content(&self, document: &Html) -> Option<BrowserReason> {
let body_selector = Selector::parse("body").ok()?;
let body = document.select(&body_selector).next()?;
let text: String = body
.text()
.collect::<Vec<_>>()
.join(" ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
if text.len() < self.min_content_length {
return Some(BrowserReason::EmptyContent);
}
None
}
fn check_js_frameworks(&self, html: &str) -> Option<BrowserReason> {
let html_lower = html.to_lowercase();
if html_lower.contains("__react")
|| html_lower.contains("data-reactroot")
|| html_lower.contains("_reactrootcontainer")
{
return Some(BrowserReason::JsFramework("React".to_string()));
}
if html_lower.contains("data-v-") || html_lower.contains("__vue__") {
return Some(BrowserReason::JsFramework("Vue".to_string()));
}
if html_lower.contains("ng-version") || html_lower.contains("ng-app") {
return Some(BrowserReason::JsFramework("Angular".to_string()));
}
if html_lower.contains("__next") || html_lower.contains("_next/static") {
return Some(BrowserReason::JsFramework("Next.js".to_string()));
}
if html_lower.contains("__nuxt") || html_lower.contains("_nuxt/") {
return Some(BrowserReason::JsFramework("Nuxt.js".to_string()));
}
if html_lower.contains("svelte-") {
return Some(BrowserReason::JsFramework("Svelte".to_string()));
}
None
}
fn check_heavy_scripts(&self, document: &Html) -> Option<BrowserReason> {
let script_selector = Selector::parse("script[src]").ok()?;
let script_count = document.select(&script_selector).count();
if script_count > self.script_threshold {
return Some(BrowserReason::HeavyScripts);
}
None
}
fn check_required_selectors(&self, document: &Html) -> Option<BrowserReason> {
if self.required_selectors.is_empty() {
return None;
}
for selector_str in &self.required_selectors {
if let Ok(selector) = Selector::parse(selector_str) {
if document.select(&selector).next().is_some() {
return None; }
}
}
Some(BrowserReason::MissingSelectors)
}
fn check_spa_patterns(&self, document: &Html) -> Option<BrowserReason> {
let app_selector = Selector::parse("#app, #root, #__next, #__nuxt").ok()?;
let app_div = document.select(&app_selector).next()?;
let text: String = app_div.text().collect::<Vec<_>>().join("");
let text = text.trim();
if text.len() < 100 {
return Some(BrowserReason::SinglePageApp);
}
None
}
}
#[derive(Debug, Clone, Default)]
pub struct RenderIndicators {
pub external_scripts: usize,
pub inline_scripts: usize,
pub has_lazy_loading: bool,
pub detected_framework: Option<String>,
pub text_content_length: usize,
pub empty_app_container: bool,
}
pub fn analyze_render_indicators(html: &str) -> RenderIndicators {
let document = Html::parse_document(html);
let mut indicators = RenderIndicators::default();
if let Ok(sel) = Selector::parse("script[src]") {
indicators.external_scripts = document.select(&sel).count();
}
if let Ok(sel) = Selector::parse("script:not([src])") {
indicators.inline_scripts = document.select(&sel).count();
}
if let Ok(sel) = Selector::parse("[data-src], [data-lazy], [loading='lazy']") {
indicators.has_lazy_loading = document.select(&sel).next().is_some();
}
let checker = RenderChecker::new();
if let Some(BrowserReason::JsFramework(fw)) = checker.check_js_frameworks(html) {
indicators.detected_framework = Some(fw);
}
if let Ok(body_sel) = Selector::parse("body") {
if let Some(body) = document.select(&body_sel).next() {
indicators.text_content_length = body
.text()
.collect::<Vec<_>>()
.join("")
.trim()
.len();
}
}
if let Ok(sel) = Selector::parse("#app, #root") {
if let Some(container) = document.select(&sel).next() {
let text: String = container.text().collect();
indicators.empty_app_container = text.trim().len() < 50;
}
}
indicators
}