Skip to main content

scrapling_browser/
engine.rs

1//! Low-level engine helpers for launching the Playwright driver and building
2//! Chromium launch options.
3//!
4//! This module sits between the high-level session types in [`crate::fetcher`] and the
5//! raw Playwright Rust bindings. It has two public functions:
6//!
7//! - [`build_launch_options`] -- assembles the full set of Chromium CLI flags, proxy
8//!   settings, executable path, and channel from a [`BrowserConfig`], merging in
9//!   stealth flags when requested and filtering out any harmful automation-revealing
10//!   arguments.
11//!
12//! - [`launch_playwright`] -- starts the Playwright driver process. This must be
13//!   called once before any browser can be launched or connected to.
14//!
15//! You typically do not call these functions directly; [`DynamicSession::start`] and
16//! [`StealthySession::start`] use them internally.
17
18use playwright_rs::Playwright;
19use playwright_rs::protocol::ProxySettings;
20use tracing::info;
21
22use crate::config::BrowserConfig;
23use crate::constants::{STEALTH_ARGS, build_args, filter_harmful_args};
24use crate::error::Result;
25
26/// Build Playwright launch options from the given browser configuration and stealth flags.
27///
28/// This function constructs the complete argument list by combining default args,
29/// user-supplied `extra_flags`, stealth args (when `stealth` is `true`), and any
30/// additional stealth args produced by [`StealthConfig::extra_stealth_args`]. It then
31/// strips out harmful flags that would reveal automation (see [`filter_harmful_args`]).
32/// Proxy, executable path, channel, and timeout settings from the config are also applied.
33pub fn build_launch_options(
34    config: &BrowserConfig,
35    stealth: bool,
36    extra_stealth_args: &[String],
37) -> playwright_rs::LaunchOptions {
38    let mut args = build_args(stealth);
39    args.extend(config.extra_flags.clone());
40
41    if stealth {
42        args.extend(STEALTH_ARGS.iter().map(|s| s.to_string()));
43    }
44    args.extend(extra_stealth_args.iter().cloned());
45
46    if config.dns_over_https {
47        args.push("--dns-over-https-templates=https://1.1.1.1/dns-query".into());
48    }
49
50    let args = filter_harmful_args(&args);
51
52    let mut opts = playwright_rs::LaunchOptions::new()
53        .headless(config.headless)
54        .args(args)
55        .timeout(config.timeout_ms);
56
57    if let Some(ref path) = config.executable_path {
58        opts = opts.executable_path(path.clone());
59    }
60
61    if let Some(ref proxy) = config.proxy {
62        let ps = ProxySettings {
63            server: proxy.server.clone(),
64            bypass: None,
65            username: proxy.username.clone(),
66            password: proxy.password.clone(),
67        };
68        opts = opts.proxy(ps);
69    }
70
71    if config.real_chrome {
72        opts = opts.channel("chrome".into());
73    }
74
75    opts
76}
77
78/// Launch and return a new Playwright driver instance.
79///
80/// This starts the Playwright Node.js server process that manages browser lifecycle
81/// and CDP communication. It must succeed before you can launch or connect to any
82/// browser. The returned [`Playwright`] handle should be kept alive for the duration
83/// of the session.
84pub async fn launch_playwright() -> Result<Playwright> {
85    info!("launching Playwright");
86    Playwright::launch()
87        .await
88        .map_err(|e| crate::error::BrowserError::Playwright(e.to_string()))
89}