Skip to main content

stygian_browser/
browser.rs

1//! Browser instance lifecycle management
2//!
3//! Provides a thin wrapper around a `chromiumoxide` [`Browser`] that adds:
4//!
5//! - Anti-detection launch arguments from [`BrowserConfig`]
6//! - Configurable launch and per-operation timeouts via `tokio::time::timeout`
7//! - Health checks using the CDP `Browser.getVersion` command
8//! - PID-based zombie process detection and forced cleanup
9//! - Graceful shutdown (close all pages ➞ send `Browser.close`)
10//!
11//! # Example
12//!
13//! ```no_run
14//! use stygian_browser::{BrowserConfig, browser::BrowserInstance};
15//!
16//! # async fn run() -> stygian_browser::error::Result<()> {
17//! let config = BrowserConfig::default();
18//! let mut instance = BrowserInstance::launch(config).await?;
19//!
20//! assert!(instance.is_healthy().await);
21//! instance.shutdown().await?;
22//! # Ok(())
23//! # }
24//! ```
25
26use std::time::{Duration, Instant};
27
28use chromiumoxide::Browser;
29use futures::StreamExt;
30use tokio::time::timeout;
31use tracing::{debug, info, warn};
32
33use crate::{
34    BrowserConfig,
35    error::{BrowserError, Result},
36};
37
38// ─── BrowserInstance ──────────────────────────────────────────────────────────
39
40/// A managed browser instance with health tracking.
41///
42/// Wraps a `chromiumoxide` [`Browser`] and an async handler task.  Always call
43/// [`BrowserInstance::shutdown`] (or drop) after use to release OS resources.
44pub struct BrowserInstance {
45    browser: Browser,
46    config: BrowserConfig,
47    launched_at: Instant,
48    /// Set to `false` after a failed health check so callers know to discard.
49    healthy: bool,
50    /// Convenience ID for log correlation.
51    id: String,
52}
53
54impl BrowserInstance {
55    /// Launch a new browser instance using the provided [`BrowserConfig`].
56    ///
57    /// All configured anti-detection arguments (see
58    /// [`BrowserConfig::effective_args`]) are passed at launch time.
59    ///
60    /// # Errors
61    ///
62    /// - [`BrowserError::LaunchFailed`] if the process does not start within
63    ///   `config.launch_timeout`.
64    /// - [`BrowserError::Timeout`] if the browser doesn't respond in time.
65    ///
66    /// # Example
67    ///
68    /// ```no_run
69    /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
70    ///
71    /// # async fn run() -> stygian_browser::error::Result<()> {
72    /// let instance = BrowserInstance::launch(BrowserConfig::default()).await?;
73    /// # Ok(())
74    /// # }
75    /// ```
76    pub async fn launch(config: BrowserConfig) -> Result<Self> {
77        let id = ulid::Ulid::new().to_string();
78        let launch_timeout = config.launch_timeout;
79
80        info!(browser_id = %id, "Launching browser");
81
82        let args = config.effective_args();
83        debug!(browser_id = %id, ?args, "Chrome launch arguments");
84
85        let mut builder = chromiumoxide::BrowserConfig::builder();
86
87        // chromiumoxide defaults to headless; call with_head() only for headed mode
88        if !config.headless {
89            builder = builder.with_head();
90        }
91
92        if let Some(path) = &config.chrome_path {
93            builder = builder.chrome_executable(path);
94        }
95
96        if let Some(dir) = &config.user_data_dir {
97            builder = builder.user_data_dir(dir);
98        }
99
100        for arg in &args {
101            // chromiumoxide's ArgsBuilder prepends "--" when formatting args, so
102            // we strip any existing "--" prefix first to avoid "----arg" in Chrome.
103            let stripped = arg.strip_prefix("--").unwrap_or(arg.as_str());
104            builder = builder.arg(stripped);
105        }
106
107        if let Some((w, h)) = config.window_size {
108            builder = builder.window_size(w, h);
109        }
110
111        let cdp_cfg = builder
112            .build()
113            .map_err(|e| BrowserError::LaunchFailed { reason: e })?;
114
115        let (browser, mut handler) = timeout(launch_timeout, Browser::launch(cdp_cfg))
116            .await
117            .map_err(|_| BrowserError::Timeout {
118                operation: "browser.launch".to_string(),
119                duration_ms: u64::try_from(launch_timeout.as_millis()).unwrap_or(u64::MAX),
120            })?
121            .map_err(|e| BrowserError::LaunchFailed {
122                reason: e.to_string(),
123            })?;
124
125        // Spawn the chromiumoxide message handler; it must run for the browser
126        // to remain responsive.
127        tokio::spawn(async move { while handler.next().await.is_some() {} });
128
129        info!(browser_id = %id, "Browser launched successfully");
130
131        Ok(Self {
132            browser,
133            config,
134            launched_at: Instant::now(),
135            healthy: true,
136            id,
137        })
138    }
139
140    // ─── Health ───────────────────────────────────────────────────────────────
141
142    /// Returns `true` if the browser is currently considered healthy.
143    ///
144    /// This is a cached value updated by [`BrowserInstance::health_check`].
145    pub const fn is_healthy_cached(&self) -> bool {
146        self.healthy
147    }
148
149    /// Actively probe the browser with a CDP request.
150    ///
151    /// Sends `Browser.getVersion` and waits up to `cdp_timeout`.  Updates the
152    /// internal healthy flag and returns the result.
153    ///
154    /// # Example
155    ///
156    /// ```no_run
157    /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
158    ///
159    /// # async fn run() -> stygian_browser::error::Result<()> {
160    /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
161    /// assert!(instance.is_healthy().await);
162    /// # Ok(())
163    /// # }
164    /// ```
165    pub async fn is_healthy(&mut self) -> bool {
166        match self.health_check().await {
167            Ok(()) => true,
168            Err(e) => {
169                warn!(browser_id = %self.id, error = %e, "Health check failed");
170                false
171            }
172        }
173    }
174
175    /// Run a health check and return a structured [`Result`].
176    ///
177    /// Pings the browser with the CDP `Browser.getVersion` RPC.
178    pub async fn health_check(&mut self) -> Result<()> {
179        let op_timeout = self.config.cdp_timeout;
180
181        timeout(op_timeout, self.browser.version())
182            .await
183            .map_err(|_| {
184                self.healthy = false;
185                BrowserError::Timeout {
186                    operation: "Browser.getVersion".to_string(),
187                    duration_ms: u64::try_from(op_timeout.as_millis()).unwrap_or(u64::MAX),
188                }
189            })?
190            .map_err(|e| {
191                self.healthy = false;
192                BrowserError::CdpError {
193                    operation: "Browser.getVersion".to_string(),
194                    message: e.to_string(),
195                }
196            })?;
197
198        self.healthy = true;
199        Ok(())
200    }
201
202    // ─── Accessors ────────────────────────────────────────────────────────────
203
204    /// Access the underlying `chromiumoxide` [`Browser`].
205    pub const fn browser(&self) -> &Browser {
206        &self.browser
207    }
208
209    /// Mutable access to the underlying `chromiumoxide` [`Browser`].
210    pub const fn browser_mut(&mut self) -> &mut Browser {
211        &mut self.browser
212    }
213
214    /// Instance ID (ULID) for log correlation.
215    pub fn id(&self) -> &str {
216        &self.id
217    }
218
219    /// How long has this instance been alive.
220    pub fn uptime(&self) -> Duration {
221        self.launched_at.elapsed()
222    }
223
224    /// The config snapshot used at launch.
225    pub const fn config(&self) -> &BrowserConfig {
226        &self.config
227    }
228
229    // ─── Shutdown ─────────────────────────────────────────────────────────────
230
231    /// Gracefully close the browser.
232    ///
233    /// Sends `Browser.close` and waits up to `cdp_timeout`.  Any errors during
234    /// tear-down are logged but not propagated so the caller can always clean up.
235    ///
236    /// # Example
237    ///
238    /// ```no_run
239    /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
240    ///
241    /// # async fn run() -> stygian_browser::error::Result<()> {
242    /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
243    /// instance.shutdown().await?;
244    /// # Ok(())
245    /// # }
246    /// ```
247    pub async fn shutdown(mut self) -> Result<()> {
248        info!(browser_id = %self.id, "Shutting down browser");
249
250        let op_timeout = self.config.cdp_timeout;
251
252        if let Err(e) = timeout(op_timeout, self.browser.close()).await {
253            // Timeout — log and continue cleanup
254            warn!(
255                browser_id = %self.id,
256                "Browser.close timed out after {}ms: {e}",
257                op_timeout.as_millis()
258            );
259        }
260
261        self.healthy = false;
262        info!(browser_id = %self.id, "Browser shut down");
263        Ok(())
264    }
265
266    /// Open a new tab and return a [`crate::page::PageHandle`].
267    ///
268    /// The handle closes the tab automatically when dropped.
269    ///
270    /// # Errors
271    ///
272    /// Returns [`BrowserError::CdpError`] if a new page cannot be created.
273    ///
274    /// # Example
275    ///
276    /// ```no_run
277    /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
278    ///
279    /// # async fn run() -> stygian_browser::error::Result<()> {
280    /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
281    /// let page = instance.new_page().await?;
282    /// drop(page);
283    /// instance.shutdown().await?;
284    /// # Ok(())
285    /// # }
286    /// ```
287    pub async fn new_page(&self) -> crate::error::Result<crate::page::PageHandle> {
288        use tokio::time::timeout;
289
290        let cdp_timeout = self.config.cdp_timeout;
291
292        let page = timeout(cdp_timeout, self.browser.new_page("about:blank"))
293            .await
294            .map_err(|_| crate::error::BrowserError::Timeout {
295                operation: "Browser.newPage".to_string(),
296                duration_ms: u64::try_from(cdp_timeout.as_millis()).unwrap_or(u64::MAX),
297            })?
298            .map_err(|e| crate::error::BrowserError::CdpError {
299                operation: "Browser.newPage".to_string(),
300                message: e.to_string(),
301            })?;
302
303        // Apply stealth injection scripts for all active stealth levels.
304        #[cfg(feature = "stealth")]
305        crate::stealth::apply_stealth_to_page(&page, &self.config).await?;
306
307        Ok(crate::page::PageHandle::new(page, cdp_timeout))
308    }
309}
310
311// ─── Tests ────────────────────────────────────────────────────────────────────
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316
317    /// Verify `BrowserConfig` `effective_args` includes anti-detection flags.
318    ///
319    /// This is a unit test that doesn't require a real Chrome binary.
320    #[test]
321    fn effective_args_contain_automation_flag() {
322        let config = BrowserConfig::default();
323        let args = config.effective_args();
324        assert!(
325            args.iter().any(|a| a.contains("AutomationControlled")),
326            "Expected --disable-blink-features=AutomationControlled in args: {args:?}"
327        );
328    }
329
330    #[test]
331    fn proxy_arg_injected_when_set() {
332        let config = BrowserConfig::builder()
333            .proxy("http://proxy.example.com:8080".to_string())
334            .build();
335        let args = config.effective_args();
336        assert!(
337            args.iter().any(|a| a.contains("proxy.example.com")),
338            "Expected proxy arg in {args:?}"
339        );
340    }
341
342    #[test]
343    fn window_size_arg_injected() {
344        let config = BrowserConfig::builder().window_size(1280, 720).build();
345        let args = config.effective_args();
346        assert!(
347            args.iter().any(|a| a.contains("1280")),
348            "Expected window-size arg in {args:?}"
349        );
350    }
351
352    #[test]
353    fn browser_instance_is_send_sync() {
354        fn assert_send<T: Send>() {}
355        fn assert_sync<T: Sync>() {}
356        assert_send::<BrowserInstance>();
357        assert_sync::<BrowserInstance>();
358    }
359
360    #[test]
361    fn no_sandbox_absent_by_default_on_non_linux() {
362        // On non-Linux (macOS, Windows) is_containerized() always returns false,
363        // so --no-sandbox must NOT appear in the default args unless overridden.
364        // On Linux in CI/Docker the STYGIAN_DISABLE_SANDBOX env var or /.dockerenv
365        // controls this — skip the assertion there to avoid false failures.
366        #[cfg(not(target_os = "linux"))]
367        {
368            let cfg = BrowserConfig::default();
369            let args = cfg.effective_args();
370            assert!(!args.iter().any(|a| a == "--no-sandbox"));
371        }
372    }
373
374    #[test]
375    fn effective_args_include_disable_dev_shm() {
376        let cfg = BrowserConfig::default();
377        let args = cfg.effective_args();
378        assert!(args.iter().any(|a| a.contains("disable-dev-shm-usage")));
379    }
380
381    #[test]
382    fn no_window_size_arg_when_none() {
383        let cfg = BrowserConfig {
384            window_size: None,
385            ..BrowserConfig::default()
386        };
387        let args = cfg.effective_args();
388        assert!(!args.iter().any(|a| a.contains("--window-size")));
389    }
390
391    #[test]
392    fn custom_arg_appended() {
393        let cfg = BrowserConfig::builder()
394            .arg("--user-agent=MyCustomBot/1.0".to_string())
395            .build();
396        let args = cfg.effective_args();
397        assert!(args.iter().any(|a| a.contains("MyCustomBot")));
398    }
399
400    #[test]
401    fn proxy_bypass_list_arg_injected() {
402        let cfg = BrowserConfig::builder()
403            .proxy("http://proxy:8080".to_string())
404            .proxy_bypass_list("<local>,localhost".to_string())
405            .build();
406        let args = cfg.effective_args();
407        assert!(args.iter().any(|a| a.contains("proxy-bypass-list")));
408    }
409
410    #[test]
411    fn headless_mode_preserved_in_config() {
412        let cfg = BrowserConfig::builder().headless(false).build();
413        assert!(!cfg.headless);
414        let cfg2 = BrowserConfig::builder().headless(true).build();
415        assert!(cfg2.headless);
416    }
417
418    #[test]
419    fn launch_timeout_default_is_non_zero() {
420        let cfg = BrowserConfig::default();
421        assert!(!cfg.launch_timeout.is_zero());
422    }
423
424    #[test]
425    fn cdp_timeout_default_is_non_zero() {
426        let cfg = BrowserConfig::default();
427        assert!(!cfg.cdp_timeout.is_zero());
428    }
429}