scrapling_browser/engine.rs
1//! Low-level engine helpers for launching the Playwright driver and building
2//! Chromium launch options.
3//!
4//! This module sits between the high-level session types in [`crate::fetcher`] and the
5//! raw Playwright Rust bindings. It has two public functions:
6//!
7//! - [`build_launch_options`] -- assembles the full set of Chromium CLI flags, proxy
8//! settings, executable path, and channel from a [`BrowserConfig`], merging in
9//! stealth flags when requested and filtering out any harmful automation-revealing
10//! arguments.
11//!
12//! - [`launch_playwright`] -- starts the Playwright driver process. This must be
13//! called once before any browser can be launched or connected to.
14//!
15//! You typically do not call these functions directly; [`DynamicSession::start`] and
16//! [`StealthySession::start`] use them internally.
17
18use playwright_rs::Playwright;
19use playwright_rs::protocol::ProxySettings;
20use tracing::info;
21
22use crate::config::BrowserConfig;
23use crate::constants::{STEALTH_ARGS, build_args, filter_harmful_args};
24use crate::error::Result;
25
26/// Build Playwright launch options from the given browser configuration and stealth flags.
27///
28/// This function constructs the complete argument list by combining default args,
29/// user-supplied `extra_flags`, stealth args (when `stealth` is `true`), and any
30/// additional stealth args produced by [`StealthConfig::extra_stealth_args`]. It then
31/// strips out harmful flags that would reveal automation (see [`filter_harmful_args`]).
32/// Proxy, executable path, channel, and timeout settings from the config are also applied.
33pub fn build_launch_options(
34 config: &BrowserConfig,
35 stealth: bool,
36 extra_stealth_args: &[String],
37) -> playwright_rs::LaunchOptions {
38 let mut args = build_args(stealth);
39 args.extend(config.extra_flags.clone());
40
41 if stealth {
42 args.extend(STEALTH_ARGS.iter().map(|s| s.to_string()));
43 }
44 args.extend(extra_stealth_args.iter().cloned());
45
46 if config.dns_over_https {
47 args.push("--dns-over-https-templates=https://1.1.1.1/dns-query".into());
48 }
49
50 let args = filter_harmful_args(&args);
51
52 let mut opts = playwright_rs::LaunchOptions::new()
53 .headless(config.headless)
54 .args(args)
55 .timeout(config.timeout_ms);
56
57 if let Some(ref path) = config.executable_path {
58 opts = opts.executable_path(path.clone());
59 }
60
61 if let Some(ref proxy) = config.proxy {
62 let ps = ProxySettings {
63 server: proxy.server.clone(),
64 bypass: None,
65 username: proxy.username.clone(),
66 password: proxy.password.clone(),
67 };
68 opts = opts.proxy(ps);
69 }
70
71 if config.real_chrome {
72 opts = opts.channel("chrome".into());
73 }
74
75 opts
76}
77
78/// Launch and return a new Playwright driver instance.
79///
80/// This starts the Playwright Node.js server process that manages browser lifecycle
81/// and CDP communication. It must succeed before you can launch or connect to any
82/// browser. The returned [`Playwright`] handle should be kept alive for the duration
83/// of the session.
84pub async fn launch_playwright() -> Result<Playwright> {
85 info!("launching Playwright");
86 Playwright::launch()
87 .await
88 .map_err(|e| crate::error::BrowserError::Playwright(e.to_string()))
89}