tibba-headless 0.2.2

browser headless for tibba
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
// Copyright 2026 Tree xie.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::{Error, HeadlessChromeSnafu};
use dashmap::DashMap;
use headless_chrome::Browser;
use headless_chrome::Tab;
use headless_chrome::protocol::cdp::Network;
use headless_chrome::protocol::cdp::Network::ResourceTiming;
use headless_chrome::protocol::cdp::Page;
use headless_chrome::protocol::cdp::Target::CreateTarget;
use headless_chrome::protocol::cdp::types::Event;
use headless_chrome::util::Wait;
use palette::{IntoColor, Luv, Srgb};
use scopeguard::defer;
use snafu::ResultExt;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::Duration;

type Result<T> = std::result::Result<T, Error>;

/// 将LUV颜色转换为256个级别,按照人眼视觉区分度划分
///
/// 基于CIELUV颜色空间的感知均匀特性,将L、u、v分量映射到256个级别:
/// - L分量(亮度):分配128个级别,因为人眼对亮度变化最敏感
/// - u分量(色度):分配64个级别
/// - v分量(色度):分配64个级别
///
/// 这种分配方式考虑了人眼的视觉特性:
/// - 人眼对亮度变化比色度变化更敏感
/// - 在低亮度区域,人眼对色度变化更敏感
/// - 在高亮度区域,人眼对亮度变化更敏感
fn luv_to_byte(luv: &Luv) -> u8 {
    // 获取L、u、v分量
    let l = luv.l;
    let u = luv.u;
    let v = luv.v;

    // 处理无效值
    if l.is_nan() || u.is_nan() || v.is_nan() {
        return 0;
    }

    // 限制L值范围到0-100
    let l_clamped = l.clamp(0.0, 100.0);

    // 限制u、v值到合理范围(通常-100到100)
    let u_clamped = u.clamp(-100.0, 100.0);
    let v_clamped = v.clamp(-100.0, 100.0);

    // 使用感知均匀的映射方式
    // 亮度分量:使用非线性映射,在低亮度区域分配更多级别
    let l_normalized = if l_clamped < 50.0 {
        // 低亮度区域:使用平方根映射,分配更多级别
        (l_clamped / 50.0).powf(0.5) * 0.6
    } else {
        // 高亮度区域:使用线性映射
        0.6 + (l_clamped - 50.0) / 50.0 * 0.4
    };

    // 色度分量:使用感知均匀的映射
    let u_normalized = (u_clamped + 100.0) / 200.0;
    let v_normalized = (v_clamped + 100.0) / 200.0;

    // 组合三个分量到256个级别
    // 使用加权组合,亮度权重更高
    let l_weight = 0.6; // 亮度权重60%
    let u_weight = 0.2; // u色度权重20%
    let v_weight = 0.2; // v色度权重20%

    let combined_value =
        l_normalized * l_weight + u_normalized * u_weight + v_normalized * v_weight;

    // 转换为0-255范围
    (combined_value * 255.0) as u8
}

#[derive(Debug, Clone, Default)]
pub struct WebPageParams {
    pub url: String,
    pub width: u32,
    pub height: u32,
    pub user_agent: Option<String>,
    pub accept_language: Option<String>,
    pub platform: Option<String>,
    pub wait_for_elements: Option<String>,
    pub wait: Option<Duration>,
    pub device_scale_factor: Option<f64>,
    pub timeout: Option<Duration>,
    pub capture_screenshot: bool,
    pub capture_element: Option<String>,
}

#[derive(Debug, Clone, Default)]
pub struct WebPageStat {
    pub total_size: u64,
    pub fcp_time: u32,
    pub dcl_time: u32,
    pub load_time: u32,
    pub html: String,
    pub exceptions: Vec<String>,
    pub resources: Vec<WebPageResource>,
    pub screenshot: Option<Screenshot>,
}

#[derive(Debug, Clone, Default)]
pub struct WebPageResource {
    pub content_size: u64,
    pub request_id: String,
    pub status: u32,
    pub url: String,
    pub timing: Option<ResourceTiming>,
    pub mime_type: String,
    pub connection_reused: bool,
}

#[derive(Debug, Clone, Default)]
pub struct WebPageLifecycle {
    pub init_time: f64,
    pub fcp_time: f64,
    pub dcl_time: f64,
    pub load_time: f64,
}

#[derive(Debug, Clone, Default)]
pub struct Screenshot {
    pub data: Vec<u8>,
    pub width: u32,
    pub height: u32,
    pub color_percents: Vec<Vec<u8>>,
}

fn analyze_web_page_screenshot(tab: Arc<Tab>, params: &WebPageParams) -> Result<Screenshot> {
    let image_data = if let Some(capture_element) = &params.capture_element {
        tab.wait_for_element(capture_element)
            .context(HeadlessChromeSnafu)?
            .capture_screenshot(Page::CaptureScreenshotFormatOption::Png)
            .context(HeadlessChromeSnafu)?
    } else {
        tab.capture_screenshot(
            Page::CaptureScreenshotFormatOption::Png,
            Some(90),
            Some(Page::Viewport {
                x: 0.0,
                y: 0.0,
                width: params.width as f64,
                height: params.height as f64,
                scale: 1.0,
            }),
            true,
        )
        .context(HeadlessChromeSnafu)?
    };

    let img = image::load_from_memory_with_format(&image_data, image::ImageFormat::Png)
        .map_err(anyhow::Error::from)
        .context(HeadlessChromeSnafu)?;
    let width = img.width();
    let height = img.height();
    let mut color_percents = vec![];
    if let Some(img) = img.as_rgba8() {
        let luv_list = img
            .pixels()
            .map(|pixel| {
                let rgb = Srgb::new(pixel[0], pixel[1], pixel[2]);
                let luv: Luv = rgb.into_linear().into_color();
                luv
            })
            .collect::<Vec<_>>();
        let mut color_count: [u64; 256] = [0; 256];
        for luv in luv_list.iter() {
            let value = luv_to_byte(luv);
            color_count[value as usize] += 1;
        }
        let count = luv_list.len() as f64;
        for (index, item) in color_count.iter().enumerate() {
            let value = (*item as f64) * 100.0 / count;
            if value < 0.5 {
                continue;
            }
            let value = value.ceil() as u8;
            color_percents.push((index, value));
        }
    }
    Ok(Screenshot {
        data: image_data,
        width,
        height,
        color_percents: color_percents
            .iter()
            .map(|item| vec![item.0 as u8, item.1])
            .collect(),
    })
}

pub async fn run_web_page_stat_with_browser(
    browser: &Browser,
    params: &WebPageParams,
) -> Result<WebPageStat> {
    let tab = browser
        .new_tab_with_options(CreateTarget {
            url: "about:blank".to_string(),
            width: Some(params.width),
            height: Some(params.height),
            browser_context_id: None,
            enable_begin_frame_control: None,
            new_window: Some(true),
            background: None,
            for_tab: None,
            left: None,
            top: None,
            window_state: None,
            hidden: None,
        })
        .context(HeadlessChromeSnafu)?;
    defer!(let _ = tab.close_with_unload(););
    if let Some(user_agent) = &params.user_agent {
        tab.set_user_agent(
            user_agent,
            params.accept_language.as_deref(),
            params.platform.as_deref(),
        )
        .context(HeadlessChromeSnafu)?;
    }
    tab.call_method(Page::SetDeviceMetricsOverride {
        width: params.width,
        height: params.height,
        device_scale_factor: params.device_scale_factor.unwrap_or(1.0),
        mobile: true,
        screen_width: Some(params.width),
        screen_height: Some(params.height),
        position_x: None,
        position_y: None,
        dont_set_visible_size: None,
        scale: None,
        screen_orientation: None,
        viewport: None,
    })
    .context(HeadlessChromeSnafu)?;
    tab.enable_runtime().context(HeadlessChromeSnafu)?;
    tab.enable_fetch(None, None).context(HeadlessChromeSnafu)?;
    tab.call_method(Network::Enable {
        max_total_buffer_size: None,
        max_resource_buffer_size: None,
        max_post_data_size: None,
        enable_durable_messages: None,
        report_direct_socket_traffic: None,
    })
    .context(HeadlessChromeSnafu)?;
    let web_page_resources = Arc::new(DashMap::<String, WebPageResource>::new());
    let web_page_resources_clone = web_page_resources.clone();
    let exceptions = Arc::new(Mutex::new(Vec::new()));
    let exceptions_clone = exceptions.clone();
    let loaded = Arc::new(AtomicBool::new(false));
    let loaded_clone = loaded.clone();
    let lifecycle = Arc::new(Mutex::new(WebPageLifecycle::default()));
    let lifecycle_clone = lifecycle.clone();

    let listener = Arc::new(move |event: &Event| {
        if let Event::PageLifecycleEvent(lifecycle) = event {
            let params = &lifecycle.params;
            match params.name.as_str() {
                "init" => {
                    if let Ok(mut lifecycle) = lifecycle_clone.lock()
                        && lifecycle.init_time == 0.0
                    {
                        lifecycle.init_time = params.timestamp;
                    }
                }
                "load" => {
                    if let Ok(mut lifecycle) = lifecycle_clone.lock() {
                        lifecycle.load_time = params.timestamp;
                    }
                    loaded_clone.store(true, Ordering::SeqCst);
                }
                "firstContentfulPaint" => {
                    if let Ok(mut lifecycle) = lifecycle_clone.lock()
                        && lifecycle.fcp_time == 0.0
                    {
                        lifecycle.fcp_time = params.timestamp;
                    }
                }
                "DOMContentLoaded" => {
                    if let Ok(mut lifecycle) = lifecycle_clone.lock()
                        && lifecycle.dcl_time == 0.0
                    {
                        lifecycle.dcl_time = params.timestamp;
                    }
                }
                _ => {}
            }
            return;
        }
        if let Event::NetworkResponseReceived(response) = event {
            let key = response.params.request_id.clone();
            let timing = response.params.response.timing.clone();
            web_page_resources_clone.insert(
                key.clone(),
                WebPageResource {
                    request_id: key,
                    status: response.params.response.status,
                    url: response.params.response.url.clone(),
                    timing,
                    mime_type: response.params.response.mime_type.clone(),
                    connection_reused: response.params.response.connection_reused,
                    ..Default::default()
                },
            );
            return;
        }
        if let Event::NetworkLoadingFinished(response) = event {
            let key = response.params.request_id.clone();
            if let Some(mut stat) = web_page_resources_clone.get_mut(&key) {
                stat.content_size = response.params.encoded_data_length as u64;
            }
            return;
        }
        if let Event::RuntimeExceptionThrown(exception) = event {
            let details = &exception.params.exception_details;
            let mut description = String::new();
            if let Some(exception) = &details.exception {
                description = exception.description.clone().unwrap_or_default();
            }
            let message = format!(
                "text: {}, line:{}, column:{}, description:{}",
                details.text, details.line_number, details.column_number, description
            );
            if let Ok(mut exceptions) = exceptions_clone.lock() {
                exceptions.push(message);
            }
        }
    });
    tab.add_event_listener(listener)
        .context(HeadlessChromeSnafu)?;
    tab.navigate_to(&params.url).context(HeadlessChromeSnafu)?;
    if let Some(wait_for_elements) = &params.wait_for_elements {
        tab.wait_for_elements(wait_for_elements)
            .context(HeadlessChromeSnafu)?;
    } else {
        Wait::with_timeout(Duration::from_secs(60))
            .until(|| {
                if loaded.load(Ordering::SeqCst) {
                    Some(true)
                } else {
                    None
                }
            })
            .map_err(anyhow::Error::from)
            .context(HeadlessChromeSnafu)?;
    }
    if let Some(wait) = params.wait {
        tokio::time::sleep(wait).await;
    }

    let mut stat = WebPageStat::default();

    if let Ok(exceptions) = exceptions.lock() {
        stat.exceptions = exceptions.clone();
    }
    stat.resources = web_page_resources
        .iter()
        .map(|item| item.value().clone())
        .collect();
    for item in stat.resources.iter() {
        stat.total_size += item.content_size;
    }
    if let Ok(lifecycle) = lifecycle.lock() {
        if lifecycle.init_time > 0.0 && lifecycle.fcp_time > 0.0 {
            stat.fcp_time = (1000.0 * (lifecycle.fcp_time - lifecycle.init_time)) as u32;
        }
        if lifecycle.init_time > 0.0 && lifecycle.dcl_time > 0.0 {
            stat.dcl_time = (1000.0 * (lifecycle.dcl_time - lifecycle.init_time)) as u32;
        }
        if lifecycle.init_time > 0.0 && lifecycle.load_time > 0.0 {
            stat.load_time = (1000.0 * (lifecycle.load_time - lifecycle.init_time)) as u32;
        }
    }

    if let Ok(document) = tab.get_content() {
        stat.html = document;
    }

    if params.capture_screenshot
        && let Ok(screenshot) = analyze_web_page_screenshot(tab.clone(), params)
    {
        stat.screenshot = Some(screenshot);
    }

    Ok(stat)
}

pub fn new_browser(cdp: &str, timeout: Option<Duration>) -> Result<Browser> {
    let browser =
        Browser::connect_with_timeout(cdp.to_string(), timeout.unwrap_or(Duration::from_secs(120)))
            .context(HeadlessChromeSnafu)?;
    Ok(browser)
}