halldyll_core/render/
decision.rs1use scraper::{Html, Selector};
4
5#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum RenderDecision {
8 Static,
10 NeedsBrowser(BrowserReason),
12}
13
14#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum BrowserReason {
17 EmptyContent,
19 HeavyScripts,
21 JsFramework(String),
23 LazyLoading,
25 MissingSelectors,
27 SinglePageApp,
29}
30
31pub struct RenderChecker {
33 script_threshold: usize,
35 min_content_length: usize,
37 required_selectors: Vec<String>,
39}
40
41impl Default for RenderChecker {
42 fn default() -> Self {
43 Self {
44 script_threshold: 10,
45 min_content_length: 500,
46 required_selectors: Vec::new(),
47 }
48 }
49}
50
51impl RenderChecker {
52 pub fn new() -> Self {
54 Self::default()
55 }
56
57 pub fn with_script_threshold(mut self, threshold: usize) -> Self {
59 self.script_threshold = threshold;
60 self
61 }
62
63 pub fn with_min_content(mut self, min_length: usize) -> Self {
65 self.min_content_length = min_length;
66 self
67 }
68
69 pub fn with_required_selectors(mut self, selectors: Vec<String>) -> Self {
71 self.required_selectors = selectors;
72 self
73 }
74
75 pub fn check(&self, html: &str) -> RenderDecision {
77 let document = Html::parse_document(html);
78
79 if let Some(reason) = self.check_empty_content(&document) {
81 return RenderDecision::NeedsBrowser(reason);
82 }
83
84 if let Some(reason) = self.check_js_frameworks(html) {
86 return RenderDecision::NeedsBrowser(reason);
87 }
88
89 if let Some(reason) = self.check_heavy_scripts(&document) {
91 return RenderDecision::NeedsBrowser(reason);
92 }
93
94 if let Some(reason) = self.check_required_selectors(&document) {
96 return RenderDecision::NeedsBrowser(reason);
97 }
98
99 if let Some(reason) = self.check_spa_patterns(&document) {
101 return RenderDecision::NeedsBrowser(reason);
102 }
103
104 RenderDecision::Static
105 }
106
107 fn check_empty_content(&self, document: &Html) -> Option<BrowserReason> {
109 let body_selector = Selector::parse("body").ok()?;
111 let body = document.select(&body_selector).next()?;
112
113 let text: String = body
115 .text()
116 .collect::<Vec<_>>()
117 .join(" ")
118 .split_whitespace()
119 .collect::<Vec<_>>()
120 .join(" ");
121
122 if text.len() < self.min_content_length {
123 return Some(BrowserReason::EmptyContent);
124 }
125
126 None
127 }
128
129 fn check_js_frameworks(&self, html: &str) -> Option<BrowserReason> {
131 let html_lower = html.to_lowercase();
132
133 if html_lower.contains("__react")
135 || html_lower.contains("data-reactroot")
136 || html_lower.contains("_reactrootcontainer")
137 {
138 return Some(BrowserReason::JsFramework("React".to_string()));
139 }
140
141 if html_lower.contains("data-v-") || html_lower.contains("__vue__") {
143 return Some(BrowserReason::JsFramework("Vue".to_string()));
144 }
145
146 if html_lower.contains("ng-version") || html_lower.contains("ng-app") {
148 return Some(BrowserReason::JsFramework("Angular".to_string()));
149 }
150
151 if html_lower.contains("__next") || html_lower.contains("_next/static") {
153 return Some(BrowserReason::JsFramework("Next.js".to_string()));
154 }
155
156 if html_lower.contains("__nuxt") || html_lower.contains("_nuxt/") {
158 return Some(BrowserReason::JsFramework("Nuxt.js".to_string()));
159 }
160
161 if html_lower.contains("svelte-") {
163 return Some(BrowserReason::JsFramework("Svelte".to_string()));
164 }
165
166 None
167 }
168
169 fn check_heavy_scripts(&self, document: &Html) -> Option<BrowserReason> {
171 let script_selector = Selector::parse("script[src]").ok()?;
172 let script_count = document.select(&script_selector).count();
173
174 if script_count > self.script_threshold {
175 return Some(BrowserReason::HeavyScripts);
176 }
177
178 None
179 }
180
181 fn check_required_selectors(&self, document: &Html) -> Option<BrowserReason> {
183 if self.required_selectors.is_empty() {
184 return None;
185 }
186
187 for selector_str in &self.required_selectors {
188 if let Ok(selector) = Selector::parse(selector_str) {
189 if document.select(&selector).next().is_some() {
190 return None; }
192 }
193 }
194
195 Some(BrowserReason::MissingSelectors)
197 }
198
199 fn check_spa_patterns(&self, document: &Html) -> Option<BrowserReason> {
201 let app_selector = Selector::parse("#app, #root, #__next, #__nuxt").ok()?;
203 let app_div = document.select(&app_selector).next()?;
204
205 let text: String = app_div.text().collect::<Vec<_>>().join("");
207 let text = text.trim();
208
209 if text.len() < 100 {
210 return Some(BrowserReason::SinglePageApp);
211 }
212
213 None
214 }
215}
216
217#[derive(Debug, Clone, Default)]
219pub struct RenderIndicators {
220 pub external_scripts: usize,
222 pub inline_scripts: usize,
224 pub has_lazy_loading: bool,
226 pub detected_framework: Option<String>,
228 pub text_content_length: usize,
230 pub empty_app_container: bool,
232}
233
234pub fn analyze_render_indicators(html: &str) -> RenderIndicators {
236 let document = Html::parse_document(html);
237 let mut indicators = RenderIndicators::default();
238
239 if let Ok(sel) = Selector::parse("script[src]") {
241 indicators.external_scripts = document.select(&sel).count();
242 }
243 if let Ok(sel) = Selector::parse("script:not([src])") {
244 indicators.inline_scripts = document.select(&sel).count();
245 }
246
247 if let Ok(sel) = Selector::parse("[data-src], [data-lazy], [loading='lazy']") {
249 indicators.has_lazy_loading = document.select(&sel).next().is_some();
250 }
251
252 let checker = RenderChecker::new();
254 if let Some(BrowserReason::JsFramework(fw)) = checker.check_js_frameworks(html) {
255 indicators.detected_framework = Some(fw);
256 }
257
258 if let Ok(body_sel) = Selector::parse("body") {
260 if let Some(body) = document.select(&body_sel).next() {
261 indicators.text_content_length = body
262 .text()
263 .collect::<Vec<_>>()
264 .join("")
265 .trim()
266 .len();
267 }
268 }
269
270 if let Ok(sel) = Selector::parse("#app, #root") {
272 if let Some(container) = document.select(&sel).next() {
273 let text: String = container.text().collect();
274 indicators.empty_app_container = text.trim().len() < 50;
275 }
276 }
277
278 indicators
279}