Skip to main content

tibba_headless/
chrome.rs

1// Copyright 2026 Tree xie.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use super::{Error, HeadlessChromeSnafu};
16use dashmap::DashMap;
17use headless_chrome::Browser;
18use headless_chrome::Tab;
19use headless_chrome::protocol::cdp::Network;
20use headless_chrome::protocol::cdp::Network::ResourceTiming;
21use headless_chrome::protocol::cdp::Page;
22use headless_chrome::protocol::cdp::Target::CreateTarget;
23use headless_chrome::protocol::cdp::types::Event;
24use headless_chrome::util::Wait;
25use palette::{IntoColor, Luv, Srgb};
26use scopeguard::defer;
27use snafu::ResultExt;
28use std::sync::Arc;
29use std::sync::Mutex;
30use std::sync::atomic::{AtomicBool, Ordering};
31use std::time::Duration;
32
33type Result<T> = std::result::Result<T, Error>;
34
35/// 将LUV颜色转换为256个级别,按照人眼视觉区分度划分
36///
37/// 基于CIELUV颜色空间的感知均匀特性,将L、u、v分量映射到256个级别:
38/// - L分量(亮度):分配128个级别,因为人眼对亮度变化最敏感
39/// - u分量(色度):分配64个级别
40/// - v分量(色度):分配64个级别
41///
42/// 这种分配方式考虑了人眼的视觉特性:
43/// - 人眼对亮度变化比色度变化更敏感
44/// - 在低亮度区域,人眼对色度变化更敏感
45/// - 在高亮度区域,人眼对亮度变化更敏感
46fn luv_to_byte(luv: &Luv) -> u8 {
47    // 获取L、u、v分量
48    let l = luv.l;
49    let u = luv.u;
50    let v = luv.v;
51
52    // 处理无效值
53    if l.is_nan() || u.is_nan() || v.is_nan() {
54        return 0;
55    }
56
57    // 限制L值范围到0-100
58    let l_clamped = l.clamp(0.0, 100.0);
59
60    // 限制u、v值到合理范围(通常-100到100)
61    let u_clamped = u.clamp(-100.0, 100.0);
62    let v_clamped = v.clamp(-100.0, 100.0);
63
64    // 使用感知均匀的映射方式
65    // 亮度分量:使用非线性映射,在低亮度区域分配更多级别
66    let l_normalized = if l_clamped < 50.0 {
67        // 低亮度区域:使用平方根映射,分配更多级别
68        (l_clamped / 50.0).powf(0.5) * 0.6
69    } else {
70        // 高亮度区域:使用线性映射
71        0.6 + (l_clamped - 50.0) / 50.0 * 0.4
72    };
73
74    // 色度分量:使用感知均匀的映射
75    let u_normalized = (u_clamped + 100.0) / 200.0;
76    let v_normalized = (v_clamped + 100.0) / 200.0;
77
78    // 组合三个分量到256个级别
79    // 使用加权组合,亮度权重更高
80    let l_weight = 0.6; // 亮度权重60%
81    let u_weight = 0.2; // u色度权重20%
82    let v_weight = 0.2; // v色度权重20%
83
84    let combined_value =
85        l_normalized * l_weight + u_normalized * u_weight + v_normalized * v_weight;
86
87    // 转换为0-255范围
88    (combined_value * 255.0) as u8
89}
90
91#[derive(Debug, Clone, Default)]
92pub struct WebPageParams {
93    pub url: String,
94    pub width: u32,
95    pub height: u32,
96    pub user_agent: Option<String>,
97    pub accept_language: Option<String>,
98    pub platform: Option<String>,
99    pub wait_for_elements: Option<String>,
100    pub wait: Option<Duration>,
101    pub device_scale_factor: Option<f64>,
102    pub timeout: Option<Duration>,
103    pub capture_screenshot: bool,
104    pub capture_element: Option<String>,
105}
106
107#[derive(Debug, Clone, Default)]
108pub struct WebPageStat {
109    pub total_size: u64,
110    pub fcp_time: u32,
111    pub dcl_time: u32,
112    pub load_time: u32,
113    pub html: String,
114    pub exceptions: Vec<String>,
115    pub resources: Vec<WebPageResource>,
116    pub screenshot: Option<Screenshot>,
117}
118
119#[derive(Debug, Clone, Default)]
120pub struct WebPageResource {
121    pub content_size: u64,
122    pub request_id: String,
123    pub status: u32,
124    pub url: String,
125    pub timing: Option<ResourceTiming>,
126    pub mime_type: String,
127    pub connection_reused: bool,
128}
129
130#[derive(Debug, Clone, Default)]
131pub struct WebPageLifecycle {
132    pub init_time: f64,
133    pub fcp_time: f64,
134    pub dcl_time: f64,
135    pub load_time: f64,
136}
137
138#[derive(Debug, Clone, Default)]
139pub struct Screenshot {
140    pub data: Vec<u8>,
141    pub width: u32,
142    pub height: u32,
143    pub color_percents: Vec<Vec<u8>>,
144}
145
146fn analyze_web_page_screenshot(tab: Arc<Tab>, params: &WebPageParams) -> Result<Screenshot> {
147    let image_data = if let Some(capture_element) = &params.capture_element {
148        tab.wait_for_element(capture_element)
149            .context(HeadlessChromeSnafu)?
150            .capture_screenshot(Page::CaptureScreenshotFormatOption::Png)
151            .context(HeadlessChromeSnafu)?
152    } else {
153        tab.capture_screenshot(
154            Page::CaptureScreenshotFormatOption::Png,
155            Some(90),
156            Some(Page::Viewport {
157                x: 0.0,
158                y: 0.0,
159                width: params.width as f64,
160                height: params.height as f64,
161                scale: 1.0,
162            }),
163            true,
164        )
165        .context(HeadlessChromeSnafu)?
166    };
167
168    let img = image::load_from_memory_with_format(&image_data, image::ImageFormat::Png)
169        .map_err(anyhow::Error::from)
170        .context(HeadlessChromeSnafu)?;
171    let width = img.width();
172    let height = img.height();
173    let mut color_percents = vec![];
174    if let Some(img) = img.as_rgba8() {
175        let luv_list = img
176            .pixels()
177            .map(|pixel| {
178                let rgb = Srgb::new(pixel[0], pixel[1], pixel[2]);
179                let luv: Luv = rgb.into_linear().into_color();
180                luv
181            })
182            .collect::<Vec<_>>();
183        let mut color_count: [u64; 256] = [0; 256];
184        for luv in luv_list.iter() {
185            let value = luv_to_byte(luv);
186            color_count[value as usize] += 1;
187        }
188        let count = luv_list.len() as f64;
189        for (index, item) in color_count.iter().enumerate() {
190            let value = (*item as f64) * 100.0 / count;
191            if value < 0.5 {
192                continue;
193            }
194            let value = value.ceil() as u8;
195            color_percents.push((index, value));
196        }
197    }
198    Ok(Screenshot {
199        data: image_data,
200        width,
201        height,
202        color_percents: color_percents
203            .iter()
204            .map(|item| vec![item.0 as u8, item.1])
205            .collect(),
206    })
207}
208
209pub async fn run_web_page_stat_with_browser(
210    browser: &Browser,
211    params: &WebPageParams,
212) -> Result<WebPageStat> {
213    let tab = browser
214        .new_tab_with_options(CreateTarget {
215            url: "about:blank".to_string(),
216            width: Some(params.width),
217            height: Some(params.height),
218            browser_context_id: None,
219            enable_begin_frame_control: None,
220            new_window: Some(true),
221            background: None,
222            for_tab: None,
223            left: None,
224            top: None,
225            window_state: None,
226            hidden: None,
227        })
228        .context(HeadlessChromeSnafu)?;
229    defer!(let _ = tab.close_with_unload(););
230    if let Some(user_agent) = &params.user_agent {
231        tab.set_user_agent(
232            user_agent,
233            params.accept_language.as_deref(),
234            params.platform.as_deref(),
235        )
236        .context(HeadlessChromeSnafu)?;
237    }
238    tab.call_method(Page::SetDeviceMetricsOverride {
239        width: params.width,
240        height: params.height,
241        device_scale_factor: params.device_scale_factor.unwrap_or(1.0),
242        mobile: true,
243        screen_width: Some(params.width),
244        screen_height: Some(params.height),
245        position_x: None,
246        position_y: None,
247        dont_set_visible_size: None,
248        scale: None,
249        screen_orientation: None,
250        viewport: None,
251    })
252    .context(HeadlessChromeSnafu)?;
253    tab.enable_runtime().context(HeadlessChromeSnafu)?;
254    tab.enable_fetch(None, None).context(HeadlessChromeSnafu)?;
255    tab.call_method(Network::Enable {
256        max_total_buffer_size: None,
257        max_resource_buffer_size: None,
258        max_post_data_size: None,
259        enable_durable_messages: None,
260        report_direct_socket_traffic: None,
261    })
262    .context(HeadlessChromeSnafu)?;
263    let web_page_resources = Arc::new(DashMap::<String, WebPageResource>::new());
264    let web_page_resources_clone = web_page_resources.clone();
265    let exceptions = Arc::new(Mutex::new(Vec::new()));
266    let exceptions_clone = exceptions.clone();
267    let loaded = Arc::new(AtomicBool::new(false));
268    let loaded_clone = loaded.clone();
269    let lifecycle = Arc::new(Mutex::new(WebPageLifecycle::default()));
270    let lifecycle_clone = lifecycle.clone();
271
272    let listener = Arc::new(move |event: &Event| {
273        if let Event::PageLifecycleEvent(lifecycle) = event {
274            let params = &lifecycle.params;
275            match params.name.as_str() {
276                "init" => {
277                    if let Ok(mut lifecycle) = lifecycle_clone.lock()
278                        && lifecycle.init_time == 0.0
279                    {
280                        lifecycle.init_time = params.timestamp;
281                    }
282                }
283                "load" => {
284                    if let Ok(mut lifecycle) = lifecycle_clone.lock() {
285                        lifecycle.load_time = params.timestamp;
286                    }
287                    loaded_clone.store(true, Ordering::SeqCst);
288                }
289                "firstContentfulPaint" => {
290                    if let Ok(mut lifecycle) = lifecycle_clone.lock()
291                        && lifecycle.fcp_time == 0.0
292                    {
293                        lifecycle.fcp_time = params.timestamp;
294                    }
295                }
296                "DOMContentLoaded" => {
297                    if let Ok(mut lifecycle) = lifecycle_clone.lock()
298                        && lifecycle.dcl_time == 0.0
299                    {
300                        lifecycle.dcl_time = params.timestamp;
301                    }
302                }
303                _ => {}
304            }
305            return;
306        }
307        if let Event::NetworkResponseReceived(response) = event {
308            let key = response.params.request_id.clone();
309            let timing = response.params.response.timing.clone();
310            web_page_resources_clone.insert(
311                key.clone(),
312                WebPageResource {
313                    request_id: key,
314                    status: response.params.response.status,
315                    url: response.params.response.url.clone(),
316                    timing,
317                    mime_type: response.params.response.mime_type.clone(),
318                    connection_reused: response.params.response.connection_reused,
319                    ..Default::default()
320                },
321            );
322            return;
323        }
324        if let Event::NetworkLoadingFinished(response) = event {
325            let key = response.params.request_id.clone();
326            if let Some(mut stat) = web_page_resources_clone.get_mut(&key) {
327                stat.content_size = response.params.encoded_data_length as u64;
328            }
329            return;
330        }
331        if let Event::RuntimeExceptionThrown(exception) = event {
332            let details = &exception.params.exception_details;
333            let mut description = String::new();
334            if let Some(exception) = &details.exception {
335                description = exception.description.clone().unwrap_or_default();
336            }
337            let message = format!(
338                "text: {}, line:{}, column:{}, description:{}",
339                details.text, details.line_number, details.column_number, description
340            );
341            if let Ok(mut exceptions) = exceptions_clone.lock() {
342                exceptions.push(message);
343            }
344        }
345    });
346    tab.add_event_listener(listener)
347        .context(HeadlessChromeSnafu)?;
348    tab.navigate_to(&params.url).context(HeadlessChromeSnafu)?;
349    if let Some(wait_for_elements) = &params.wait_for_elements {
350        tab.wait_for_elements(wait_for_elements)
351            .context(HeadlessChromeSnafu)?;
352    } else {
353        Wait::with_timeout(Duration::from_secs(60))
354            .until(|| {
355                if loaded.load(Ordering::SeqCst) {
356                    Some(true)
357                } else {
358                    None
359                }
360            })
361            .map_err(anyhow::Error::from)
362            .context(HeadlessChromeSnafu)?;
363    }
364    if let Some(wait) = params.wait {
365        tokio::time::sleep(wait).await;
366    }
367
368    let mut stat = WebPageStat::default();
369
370    if let Ok(exceptions) = exceptions.lock() {
371        stat.exceptions = exceptions.clone();
372    }
373    stat.resources = web_page_resources
374        .iter()
375        .map(|item| item.value().clone())
376        .collect();
377    for item in stat.resources.iter() {
378        stat.total_size += item.content_size;
379    }
380    if let Ok(lifecycle) = lifecycle.lock() {
381        if lifecycle.init_time > 0.0 && lifecycle.fcp_time > 0.0 {
382            stat.fcp_time = (1000.0 * (lifecycle.fcp_time - lifecycle.init_time)) as u32;
383        }
384        if lifecycle.init_time > 0.0 && lifecycle.dcl_time > 0.0 {
385            stat.dcl_time = (1000.0 * (lifecycle.dcl_time - lifecycle.init_time)) as u32;
386        }
387        if lifecycle.init_time > 0.0 && lifecycle.load_time > 0.0 {
388            stat.load_time = (1000.0 * (lifecycle.load_time - lifecycle.init_time)) as u32;
389        }
390    }
391
392    if let Ok(document) = tab.get_content() {
393        stat.html = document;
394    }
395
396    if params.capture_screenshot
397        && let Ok(screenshot) = analyze_web_page_screenshot(tab.clone(), params)
398    {
399        stat.screenshot = Some(screenshot);
400    }
401
402    Ok(stat)
403}
404
405pub fn new_browser(cdp: &str, timeout: Option<Duration>) -> Result<Browser> {
406    let browser =
407        Browser::connect_with_timeout(cdp.to_string(), timeout.unwrap_or(Duration::from_secs(120)))
408            .context(HeadlessChromeSnafu)?;
409    Ok(browser)
410}