Skip to main content

chaser_oxide/browser/
config.rs

1use std::time::Duration;
2use std::{
3    collections::HashMap,
4    io,
5    path::{Path, PathBuf},
6};
7
8use super::argument::{Arg, ArgConst, ArgsBuilder};
9use crate::async_process::{self, Child, Stdio};
10use crate::detection::{self, DetectionOptions};
11use crate::handler::viewport::Viewport;
12use crate::handler::REQUEST_TIMEOUT;
13
14/// Default `Browser::launch` timeout in MS
15pub const LAUNCH_TIMEOUT: u64 = 20_000;
16
17#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
18pub enum HeadlessMode {
19    /// The "headful" mode.
20    False,
21    /// The old headless mode.
22    #[default]
23    True,
24    /// The new headless mode. See also: https://developer.chrome.com/docs/chromium/new-headless
25    New,
26}
27
28#[derive(Debug, Clone)]
29pub struct BrowserConfig {
30    /// Determines whether to run headless version of the browser. Defaults to
31    /// true.
32    pub(crate) headless: HeadlessMode,
33
34    /// Determines whether to run the browser with a sandbox.
35    pub(crate) sandbox: bool,
36
37    /// Launch the browser with a specific window width and height.
38    pub(crate) window_size: Option<(u32, u32)>,
39
40    /// Launch the browser with a specific debugging port.
41    pub(crate) port: u16,
42
43    /// Path for Chrome or Chromium.
44    ///
45    /// If unspecified, the create will try to automatically detect a suitable
46    /// binary.
47    pub(crate) executable: std::path::PathBuf,
48
49    /// A list of Chrome extensions to load.
50    ///
51    /// An extension should be a path to a folder containing the extension code.
52    /// CRX files cannot be used directly and must be first extracted.
53    ///
54    /// Note that Chrome does not support loading extensions in headless-mode.
55    /// See https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
56    pub(crate) extensions: Vec<String>,
57
58    /// Environment variables to set for the Chromium process.
59    /// Passes value through to std::process::Command::envs.
60    pub process_envs: Option<HashMap<String, String>>,
61
62    /// Data dir for user data
63    pub user_data_dir: Option<PathBuf>,
64
65    /// Whether to launch the `Browser` in incognito mode
66    pub(crate) incognito: bool,
67
68    /// Timeout duration for `Browser::launch`.
69    pub(crate) launch_timeout: Duration,
70
71    /// Ignore https errors, default is true
72    pub(crate) ignore_https_errors: bool,
73
74    /// Ignore invalid messages, default is true
75    pub(crate) ignore_invalid_messages: bool,
76
77    /// Disable HTTPS-first features (HttpsUpgrades, HttpsFirstBalancedModeAutoEnable)
78    pub(crate) disable_https_first: bool,
79
80    /// The viewport of the browser
81    pub(crate) viewport: Option<Viewport>,
82
83    /// The duration after a request with no response should time out
84    pub(crate) request_timeout: Duration,
85
86    /// Additional command line arguments to pass to the browser instance.
87    pub(crate) args: Vec<Arg>,
88
89    /// Whether to disable DEFAULT_ARGS or not, default is false
90    pub(crate) disable_default_args: bool,
91
92    /// Whether to enable request interception
93    pub request_intercept: bool,
94
95    /// Whether to enable cache
96    pub cache_enabled: bool,
97
98    /// Avoid easy bot detection by setting `navigator.webdriver` to false
99    pub(crate) hidden: bool,
100}
101
102#[derive(Debug, Clone)]
103pub struct BrowserConfigBuilder {
104    headless: HeadlessMode,
105    sandbox: bool,
106    window_size: Option<(u32, u32)>,
107    port: u16,
108    executable: Option<PathBuf>,
109    executation_detection: DetectionOptions,
110    extensions: Vec<String>,
111    process_envs: Option<HashMap<String, String>>,
112    user_data_dir: Option<PathBuf>,
113    incognito: bool,
114    launch_timeout: Duration,
115    ignore_https_errors: bool,
116    ignore_invalid_events: bool,
117    disable_https_first: bool,
118    viewport: Option<Viewport>,
119    request_timeout: Duration,
120    args: Vec<Arg>,
121    disable_default_args: bool,
122    request_intercept: bool,
123    cache_enabled: bool,
124    hidden: bool,
125}
126
127impl BrowserConfig {
128    pub fn builder() -> BrowserConfigBuilder {
129        BrowserConfigBuilder::default()
130    }
131
132    pub fn with_executable(path: impl AsRef<Path>) -> Self {
133        Self::builder().chrome_executable(path).build().unwrap()
134    }
135}
136
137impl Default for BrowserConfigBuilder {
138    fn default() -> Self {
139        Self {
140            headless: HeadlessMode::False,
141            sandbox: true,
142            window_size: None,
143            port: 0,
144            executable: None,
145            executation_detection: DetectionOptions::default(),
146            extensions: Vec::new(),
147            process_envs: None,
148            user_data_dir: None,
149            incognito: false,
150            launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
151            ignore_https_errors: true,
152            ignore_invalid_events: true,
153            disable_https_first: false,
154            viewport: Some(Default::default()),
155            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
156            args: Vec::new(),
157            disable_default_args: false,
158            request_intercept: false,
159            cache_enabled: true,
160            hidden: true,
161        }
162    }
163}
164
165impl BrowserConfigBuilder {
166    pub fn window_size(mut self, width: u32, height: u32) -> Self {
167        self.window_size = Some((width, height));
168        self
169    }
170
171    pub fn no_sandbox(mut self) -> Self {
172        self.sandbox = false;
173        self
174    }
175
176    pub fn with_head(mut self) -> Self {
177        self.headless = HeadlessMode::False;
178        self
179    }
180
181    pub fn new_headless_mode(mut self) -> Self {
182        self.headless = HeadlessMode::New;
183        self
184    }
185
186    pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
187        self.headless = mode;
188        self
189    }
190
191    pub fn incognito(mut self) -> Self {
192        self.incognito = true;
193        self
194    }
195
196    pub fn respect_https_errors(mut self) -> Self {
197        self.ignore_https_errors = false;
198        self
199    }
200
201    /// The browser handler will return [CdpError::InvalidMessage] if a received
202    /// message cannot be parsed.
203    pub fn surface_invalid_messages(mut self) -> Self {
204        self.ignore_invalid_events = false;
205        self
206    }
207
208    pub fn port(mut self, port: u16) -> Self {
209        self.port = port;
210        self
211    }
212
213    pub fn launch_timeout(mut self, timeout: Duration) -> Self {
214        self.launch_timeout = timeout;
215        self
216    }
217
218    pub fn request_timeout(mut self, timeout: Duration) -> Self {
219        self.request_timeout = timeout;
220        self
221    }
222
223    /// Configures the viewport of the browser, which defaults to `800x600`.
224    /// `None` disables viewport emulation (i.e., it uses the browsers default
225    /// configuration, which fills the available space. This is similar to what
226    /// Playwright does when you provide `null` as the value of its `viewport`
227    /// option).
228    pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
229        self.viewport = viewport.into();
230        self
231    }
232
233    pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
234        self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
235        self
236    }
237
238    pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
239        self.executable = Some(path.as_ref().to_path_buf());
240        self
241    }
242
243    pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
244        self.executation_detection = options;
245        self
246    }
247
248    pub fn extension(mut self, extension: impl Into<String>) -> Self {
249        self.extensions.push(extension.into());
250        self
251    }
252
253    pub fn extensions<I, S>(mut self, extensions: I) -> Self
254    where
255        I: IntoIterator<Item = S>,
256        S: Into<String>,
257    {
258        for ext in extensions {
259            self.extensions.push(ext.into());
260        }
261        self
262    }
263
264    pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
265        self.process_envs
266            .get_or_insert(HashMap::new())
267            .insert(key.into(), val.into());
268        self
269    }
270
271    pub fn envs<I, K, V>(mut self, envs: I) -> Self
272    where
273        I: IntoIterator<Item = (K, V)>,
274        K: Into<String>,
275        V: Into<String>,
276    {
277        self.process_envs
278            .get_or_insert(HashMap::new())
279            .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
280        self
281    }
282
283    pub fn arg(mut self, arg: impl Into<Arg>) -> Self {
284        self.args.push(arg.into());
285        self
286    }
287
288    pub fn args<I, S>(mut self, args: I) -> Self
289    where
290        I: IntoIterator<Item = S>,
291        S: Into<Arg>,
292    {
293        for arg in args {
294            self.args.push(arg.into());
295        }
296        self
297    }
298
299    pub fn disable_default_args(mut self) -> Self {
300        self.disable_default_args = true;
301        self
302    }
303
304    pub fn disable_https_first(mut self) -> Self {
305        self.disable_https_first = true;
306        self
307    }
308
309    pub fn enable_request_intercept(mut self) -> Self {
310        self.request_intercept = true;
311        self
312    }
313
314    pub fn disable_request_intercept(mut self) -> Self {
315        self.request_intercept = false;
316        self
317    }
318
319    pub fn enable_cache(mut self) -> Self {
320        self.cache_enabled = true;
321        self
322    }
323
324    pub fn disable_cache(mut self) -> Self {
325        self.cache_enabled = false;
326        self
327    }
328
329    pub fn hide(mut self) -> Self {
330        self.hidden = true;
331        self
332    }
333
334    pub fn build(self) -> std::result::Result<BrowserConfig, String> {
335        let executable = if let Some(e) = self.executable {
336            e
337        } else {
338            detection::default_executable(self.executation_detection)?
339        };
340
341        Ok(BrowserConfig {
342            headless: self.headless,
343            sandbox: self.sandbox,
344            window_size: self.window_size,
345            port: self.port,
346            executable,
347            extensions: self.extensions,
348            process_envs: self.process_envs,
349            user_data_dir: self.user_data_dir,
350            incognito: self.incognito,
351            launch_timeout: self.launch_timeout,
352            ignore_https_errors: self.ignore_https_errors,
353            ignore_invalid_messages: self.ignore_invalid_events,
354            disable_https_first: self.disable_https_first,
355            viewport: self.viewport,
356            request_timeout: self.request_timeout,
357            args: self.args,
358            disable_default_args: self.disable_default_args,
359            request_intercept: self.request_intercept,
360            cache_enabled: self.cache_enabled,
361            hidden: self.hidden,
362        })
363    }
364}
365
366impl BrowserConfig {
367    pub fn launch(&self) -> io::Result<Child> {
368        let mut builder = ArgsBuilder::new();
369
370        if self.disable_default_args {
371            builder.args(self.args.clone());
372        } else {
373            builder.args(DEFAULT_ARGS.clone()).args(self.args.clone());
374        }
375
376        if !builder.has("remote-debugging-port") {
377            builder.arg(Arg::value("remote-debugging-port", self.port));
378        }
379
380        if self.extensions.is_empty() {
381            builder.arg(Arg::key("disable-extensions"));
382        } else {
383            builder.args(
384                self.extensions
385                    .iter()
386                    .map(|e| Arg::value("load-extension", e)),
387            );
388        }
389
390        if let Some(ref user_data) = self.user_data_dir {
391            builder.arg(Arg::value("user-data-dir", user_data.display()));
392        } else {
393            // If the user did not specify a data directory, this would default to the systems default
394            // data directory. In most cases, we would rather have a fresh instance of Chromium. Specify
395            // a temp dir just for chromiumoxide instead.
396            builder.arg(Arg::value(
397                "user-data-dir",
398                std::env::temp_dir().join("chromiumoxide-runner").display(),
399            ));
400        }
401
402        if let Some((width, height)) = self.window_size {
403            builder.arg(Arg::values("window-size", [width, height]));
404        }
405
406        if !self.sandbox {
407            builder.args([Arg::key("no-sandbox"), Arg::key("disable-setuid-sandbox")]);
408        }
409
410        match self.headless {
411            HeadlessMode::False => (),
412            HeadlessMode::True => {
413                builder.args([
414                    Arg::key("headless"),
415                    Arg::key("hide-scrollbars"),
416                    Arg::key("mute-audio"),
417                ]);
418            }
419            HeadlessMode::New => {
420                builder.args([
421                    Arg::value("headless", "new"),
422                    Arg::key("hide-scrollbars"),
423                    Arg::key("mute-audio"),
424                ]);
425            }
426        }
427
428        if self.incognito {
429            builder.arg(Arg::key("incognito"));
430        }
431
432        if self.hidden {
433            builder.arg(Arg::value("disable-blink-features", "AutomationControlled"));
434        }
435
436        if self.disable_https_first {
437            builder.arg(Arg::values(
438                "disable-features",
439                ["HttpsUpgrades", "HttpsFirstBalancedModeAutoEnable"],
440            ));
441        }
442
443        let mut cmd = async_process::Command::new(&self.executable);
444
445        let args = builder.into_iter().collect::<Vec<String>>();
446        cmd.args(args);
447
448        if let Some(ref envs) = self.process_envs {
449            cmd.envs(envs);
450        }
451        cmd.stderr(Stdio::piped()).spawn()
452    }
453}
454
455/// These are passed to the Chrome binary by default.
456/// Via https://github.com/puppeteer/puppeteer/blob/4846b8723cf20d3551c0d755df394cc5e0c82a94/src/node/Launcher.ts#L157
457static DEFAULT_ARGS: [ArgConst; 24] = [
458    ArgConst::key("disable-background-networking"),
459    ArgConst::values(
460        "enable-features",
461        &["NetworkService", "NetworkServiceInProcess"],
462    ),
463    ArgConst::key("disable-background-timer-throttling"),
464    ArgConst::key("disable-backgrounding-occluded-windows"),
465    ArgConst::key("disable-breakpad"),
466    ArgConst::key("disable-client-side-phishing-detection"),
467    ArgConst::key("disable-component-extensions-with-background-pages"),
468    ArgConst::key("disable-default-apps"),
469    ArgConst::key("disable-dev-shm-usage"),
470    ArgConst::values("disable-features", &["TranslateUI"]),
471    ArgConst::key("disable-hang-monitor"),
472    ArgConst::key("disable-ipc-flooding-protection"),
473    ArgConst::key("disable-popup-blocking"),
474    ArgConst::key("disable-prompt-on-repost"),
475    ArgConst::key("disable-renderer-backgrounding"),
476    ArgConst::key("disable-sync"),
477    ArgConst::values("force-color-profile", &["srgb"]),
478    ArgConst::key("metrics-recording-only"),
479    ArgConst::key("no-first-run"),
480    ArgConst::values("user-agent", &["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"]),
481    ArgConst::values("password-store", &["basic"]),
482    ArgConst::key("use-mock-keychain"),
483    ArgConst::values("enable-blink-features", &["IdleDetection"]),
484    ArgConst::values("lang", &["en_US"]),
485];