1use super::{Error, HeadlessChromeSnafu};
16use dashmap::DashMap;
17use headless_chrome::Browser;
18use headless_chrome::Tab;
19use headless_chrome::protocol::cdp::Network;
20use headless_chrome::protocol::cdp::Network::ResourceTiming;
21use headless_chrome::protocol::cdp::Page;
22use headless_chrome::protocol::cdp::Target::CreateTarget;
23use headless_chrome::protocol::cdp::types::Event;
24use headless_chrome::util::Wait;
25use palette::{IntoColor, Luv, Srgb};
26use scopeguard::defer;
27use snafu::ResultExt;
28use std::sync::Arc;
29use std::sync::Mutex;
30use std::sync::atomic::{AtomicBool, Ordering};
31use std::time::Duration;
32
33type Result<T> = std::result::Result<T, Error>;
34
35fn luv_to_byte(luv: &Luv) -> u8 {
47 let l = luv.l;
49 let u = luv.u;
50 let v = luv.v;
51
52 if l.is_nan() || u.is_nan() || v.is_nan() {
54 return 0;
55 }
56
57 let l_clamped = l.clamp(0.0, 100.0);
59
60 let u_clamped = u.clamp(-100.0, 100.0);
62 let v_clamped = v.clamp(-100.0, 100.0);
63
64 let l_normalized = if l_clamped < 50.0 {
67 (l_clamped / 50.0).powf(0.5) * 0.6
69 } else {
70 0.6 + (l_clamped - 50.0) / 50.0 * 0.4
72 };
73
74 let u_normalized = (u_clamped + 100.0) / 200.0;
76 let v_normalized = (v_clamped + 100.0) / 200.0;
77
78 let l_weight = 0.6; let u_weight = 0.2; let v_weight = 0.2; let combined_value =
85 l_normalized * l_weight + u_normalized * u_weight + v_normalized * v_weight;
86
87 (combined_value * 255.0) as u8
89}
90
91#[derive(Debug, Clone, Default)]
92pub struct WebPageParams {
93 pub url: String,
94 pub width: u32,
95 pub height: u32,
96 pub user_agent: Option<String>,
97 pub accept_language: Option<String>,
98 pub platform: Option<String>,
99 pub wait_for_elements: Option<String>,
100 pub wait: Option<Duration>,
101 pub device_scale_factor: Option<f64>,
102 pub timeout: Option<Duration>,
103 pub capture_screenshot: bool,
104 pub capture_element: Option<String>,
105}
106
107#[derive(Debug, Clone, Default)]
108pub struct WebPageStat {
109 pub total_size: u64,
110 pub fcp_time: u32,
111 pub dcl_time: u32,
112 pub load_time: u32,
113 pub html: String,
114 pub exceptions: Vec<String>,
115 pub resources: Vec<WebPageResource>,
116 pub screenshot: Option<Screenshot>,
117}
118
119#[derive(Debug, Clone, Default)]
120pub struct WebPageResource {
121 pub content_size: u64,
122 pub request_id: String,
123 pub status: u32,
124 pub url: String,
125 pub timing: Option<ResourceTiming>,
126 pub mime_type: String,
127 pub connection_reused: bool,
128}
129
130#[derive(Debug, Clone, Default)]
131pub struct WebPageLifecycle {
132 pub init_time: f64,
133 pub fcp_time: f64,
134 pub dcl_time: f64,
135 pub load_time: f64,
136}
137
138#[derive(Debug, Clone, Default)]
139pub struct Screenshot {
140 pub data: Vec<u8>,
141 pub width: u32,
142 pub height: u32,
143 pub color_percents: Vec<Vec<u8>>,
144}
145
146fn analyze_web_page_screenshot(tab: Arc<Tab>, params: &WebPageParams) -> Result<Screenshot> {
147 let image_data = if let Some(capture_element) = ¶ms.capture_element {
148 tab.wait_for_element(capture_element)
149 .context(HeadlessChromeSnafu)?
150 .capture_screenshot(Page::CaptureScreenshotFormatOption::Png)
151 .context(HeadlessChromeSnafu)?
152 } else {
153 tab.capture_screenshot(
154 Page::CaptureScreenshotFormatOption::Png,
155 Some(90),
156 Some(Page::Viewport {
157 x: 0.0,
158 y: 0.0,
159 width: params.width as f64,
160 height: params.height as f64,
161 scale: 1.0,
162 }),
163 true,
164 )
165 .context(HeadlessChromeSnafu)?
166 };
167
168 let img = image::load_from_memory_with_format(&image_data, image::ImageFormat::Png)
169 .map_err(anyhow::Error::from)
170 .context(HeadlessChromeSnafu)?;
171 let width = img.width();
172 let height = img.height();
173 let mut color_percents = vec![];
174 if let Some(img) = img.as_rgba8() {
175 let luv_list = img
176 .pixels()
177 .map(|pixel| {
178 let rgb = Srgb::new(pixel[0], pixel[1], pixel[2]);
179 let luv: Luv = rgb.into_linear().into_color();
180 luv
181 })
182 .collect::<Vec<_>>();
183 let mut color_count: [u64; 256] = [0; 256];
184 for luv in luv_list.iter() {
185 let value = luv_to_byte(luv);
186 color_count[value as usize] += 1;
187 }
188 let count = luv_list.len() as f64;
189 for (index, item) in color_count.iter().enumerate() {
190 let value = (*item as f64) * 100.0 / count;
191 if value < 0.5 {
192 continue;
193 }
194 let value = value.ceil() as u8;
195 color_percents.push((index, value));
196 }
197 }
198 Ok(Screenshot {
199 data: image_data,
200 width,
201 height,
202 color_percents: color_percents
203 .iter()
204 .map(|item| vec![item.0 as u8, item.1])
205 .collect(),
206 })
207}
208
209pub async fn run_web_page_stat_with_browser(
210 browser: &Browser,
211 params: &WebPageParams,
212) -> Result<WebPageStat> {
213 let tab = browser
214 .new_tab_with_options(CreateTarget {
215 url: "about:blank".to_string(),
216 width: Some(params.width),
217 height: Some(params.height),
218 browser_context_id: None,
219 enable_begin_frame_control: None,
220 new_window: Some(true),
221 background: None,
222 for_tab: None,
223 left: None,
224 top: None,
225 window_state: None,
226 hidden: None,
227 })
228 .context(HeadlessChromeSnafu)?;
229 defer!(let _ = tab.close_with_unload(););
230 if let Some(user_agent) = ¶ms.user_agent {
231 tab.set_user_agent(
232 user_agent,
233 params.accept_language.as_deref(),
234 params.platform.as_deref(),
235 )
236 .context(HeadlessChromeSnafu)?;
237 }
238 tab.call_method(Page::SetDeviceMetricsOverride {
239 width: params.width,
240 height: params.height,
241 device_scale_factor: params.device_scale_factor.unwrap_or(1.0),
242 mobile: true,
243 screen_width: Some(params.width),
244 screen_height: Some(params.height),
245 position_x: None,
246 position_y: None,
247 dont_set_visible_size: None,
248 scale: None,
249 screen_orientation: None,
250 viewport: None,
251 })
252 .context(HeadlessChromeSnafu)?;
253 tab.enable_runtime().context(HeadlessChromeSnafu)?;
254 tab.enable_fetch(None, None).context(HeadlessChromeSnafu)?;
255 tab.call_method(Network::Enable {
256 max_total_buffer_size: None,
257 max_resource_buffer_size: None,
258 max_post_data_size: None,
259 enable_durable_messages: None,
260 report_direct_socket_traffic: None,
261 })
262 .context(HeadlessChromeSnafu)?;
263 let web_page_resources = Arc::new(DashMap::<String, WebPageResource>::new());
264 let web_page_resources_clone = web_page_resources.clone();
265 let exceptions = Arc::new(Mutex::new(Vec::new()));
266 let exceptions_clone = exceptions.clone();
267 let loaded = Arc::new(AtomicBool::new(false));
268 let loaded_clone = loaded.clone();
269 let lifecycle = Arc::new(Mutex::new(WebPageLifecycle::default()));
270 let lifecycle_clone = lifecycle.clone();
271
272 let listener = Arc::new(move |event: &Event| {
273 if let Event::PageLifecycleEvent(lifecycle) = event {
274 let params = &lifecycle.params;
275 match params.name.as_str() {
276 "init" => {
277 if let Ok(mut lifecycle) = lifecycle_clone.lock()
278 && lifecycle.init_time == 0.0
279 {
280 lifecycle.init_time = params.timestamp;
281 }
282 }
283 "load" => {
284 if let Ok(mut lifecycle) = lifecycle_clone.lock() {
285 lifecycle.load_time = params.timestamp;
286 }
287 loaded_clone.store(true, Ordering::SeqCst);
288 }
289 "firstContentfulPaint" => {
290 if let Ok(mut lifecycle) = lifecycle_clone.lock()
291 && lifecycle.fcp_time == 0.0
292 {
293 lifecycle.fcp_time = params.timestamp;
294 }
295 }
296 "DOMContentLoaded" => {
297 if let Ok(mut lifecycle) = lifecycle_clone.lock()
298 && lifecycle.dcl_time == 0.0
299 {
300 lifecycle.dcl_time = params.timestamp;
301 }
302 }
303 _ => {}
304 }
305 return;
306 }
307 if let Event::NetworkResponseReceived(response) = event {
308 let key = response.params.request_id.clone();
309 let timing = response.params.response.timing.clone();
310 web_page_resources_clone.insert(
311 key.clone(),
312 WebPageResource {
313 request_id: key,
314 status: response.params.response.status,
315 url: response.params.response.url.clone(),
316 timing,
317 mime_type: response.params.response.mime_type.clone(),
318 connection_reused: response.params.response.connection_reused,
319 ..Default::default()
320 },
321 );
322 return;
323 }
324 if let Event::NetworkLoadingFinished(response) = event {
325 let key = response.params.request_id.clone();
326 if let Some(mut stat) = web_page_resources_clone.get_mut(&key) {
327 stat.content_size = response.params.encoded_data_length as u64;
328 }
329 return;
330 }
331 if let Event::RuntimeExceptionThrown(exception) = event {
332 let details = &exception.params.exception_details;
333 let mut description = String::new();
334 if let Some(exception) = &details.exception {
335 description = exception.description.clone().unwrap_or_default();
336 }
337 let message = format!(
338 "text: {}, line:{}, column:{}, description:{}",
339 details.text, details.line_number, details.column_number, description
340 );
341 if let Ok(mut exceptions) = exceptions_clone.lock() {
342 exceptions.push(message);
343 }
344 }
345 });
346 tab.add_event_listener(listener)
347 .context(HeadlessChromeSnafu)?;
348 tab.navigate_to(¶ms.url).context(HeadlessChromeSnafu)?;
349 if let Some(wait_for_elements) = ¶ms.wait_for_elements {
350 tab.wait_for_elements(wait_for_elements)
351 .context(HeadlessChromeSnafu)?;
352 } else {
353 Wait::with_timeout(Duration::from_secs(60))
354 .until(|| {
355 if loaded.load(Ordering::SeqCst) {
356 Some(true)
357 } else {
358 None
359 }
360 })
361 .map_err(anyhow::Error::from)
362 .context(HeadlessChromeSnafu)?;
363 }
364 if let Some(wait) = params.wait {
365 tokio::time::sleep(wait).await;
366 }
367
368 let mut stat = WebPageStat::default();
369
370 if let Ok(exceptions) = exceptions.lock() {
371 stat.exceptions = exceptions.clone();
372 }
373 stat.resources = web_page_resources
374 .iter()
375 .map(|item| item.value().clone())
376 .collect();
377 for item in stat.resources.iter() {
378 stat.total_size += item.content_size;
379 }
380 if let Ok(lifecycle) = lifecycle.lock() {
381 if lifecycle.init_time > 0.0 && lifecycle.fcp_time > 0.0 {
382 stat.fcp_time = (1000.0 * (lifecycle.fcp_time - lifecycle.init_time)) as u32;
383 }
384 if lifecycle.init_time > 0.0 && lifecycle.dcl_time > 0.0 {
385 stat.dcl_time = (1000.0 * (lifecycle.dcl_time - lifecycle.init_time)) as u32;
386 }
387 if lifecycle.init_time > 0.0 && lifecycle.load_time > 0.0 {
388 stat.load_time = (1000.0 * (lifecycle.load_time - lifecycle.init_time)) as u32;
389 }
390 }
391
392 if let Ok(document) = tab.get_content() {
393 stat.html = document;
394 }
395
396 if params.capture_screenshot
397 && let Ok(screenshot) = analyze_web_page_screenshot(tab.clone(), params)
398 {
399 stat.screenshot = Some(screenshot);
400 }
401
402 Ok(stat)
403}
404
405pub fn new_browser(cdp: &str, timeout: Option<Duration>) -> Result<Browser> {
406 let browser =
407 Browser::connect_with_timeout(cdp.to_string(), timeout.unwrap_or(Duration::from_secs(120)))
408 .context(HeadlessChromeSnafu)?;
409 Ok(browser)
410}