stygian_browser/browser.rs
1//! Browser instance lifecycle management
2//!
3//! Provides a thin wrapper around a `chromiumoxide` [`Browser`] that adds:
4//!
5//! - Anti-detection launch arguments from [`BrowserConfig`]
6//! - Configurable launch and per-operation timeouts via `tokio::time::timeout`
7//! - Health checks using the CDP `Browser.getVersion` command
8//! - PID-based zombie process detection and forced cleanup
9//! - Graceful shutdown (close all pages ➞ send `Browser.close`)
10//!
11//! # Example
12//!
13//! ```no_run
14//! use stygian_browser::{BrowserConfig, browser::BrowserInstance};
15//!
16//! # async fn run() -> stygian_browser::error::Result<()> {
17//! let config = BrowserConfig::default();
18//! let mut instance = BrowserInstance::launch(config).await?;
19//!
20//! assert!(instance.is_healthy().await);
21//! instance.shutdown().await?;
22//! # Ok(())
23//! # }
24//! ```
25
26use std::time::{Duration, Instant};
27
28use chromiumoxide::Browser;
29use futures::StreamExt;
30use tokio::time::timeout;
31use tracing::{debug, info, warn};
32
33use crate::{
34 BrowserConfig,
35 error::{BrowserError, Result},
36};
37
38// ─── BrowserInstance ──────────────────────────────────────────────────────────
39
40/// A managed browser instance with health tracking.
41///
42/// Wraps a `chromiumoxide` [`Browser`] and an async handler task. Always call
43/// [`BrowserInstance::shutdown`] (or drop) after use to release OS resources.
44pub struct BrowserInstance {
45 browser: Browser,
46 config: BrowserConfig,
47 launched_at: Instant,
48 /// Set to `false` after a failed health check so callers know to discard.
49 healthy: bool,
50 /// Convenience ID for log correlation.
51 id: String,
52}
53
54impl BrowserInstance {
55 /// Launch a new browser instance using the provided [`BrowserConfig`].
56 ///
57 /// All configured anti-detection arguments (see
58 /// [`BrowserConfig::effective_args`]) are passed at launch time.
59 ///
60 /// # Errors
61 ///
62 /// - [`BrowserError::LaunchFailed`] if the process does not start within
63 /// `config.launch_timeout`.
64 /// - [`BrowserError::Timeout`] if the browser doesn't respond in time.
65 ///
66 /// # Example
67 ///
68 /// ```no_run
69 /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
70 ///
71 /// # async fn run() -> stygian_browser::error::Result<()> {
72 /// let instance = BrowserInstance::launch(BrowserConfig::default()).await?;
73 /// # Ok(())
74 /// # }
75 /// ```
76 pub async fn launch(config: BrowserConfig) -> Result<Self> {
77 let id = ulid::Ulid::new().to_string();
78 let launch_timeout = config.launch_timeout;
79
80 info!(browser_id = %id, "Launching browser");
81
82 let args = config.effective_args();
83 debug!(browser_id = %id, ?args, "Chrome launch arguments");
84
85 let mut builder = chromiumoxide::BrowserConfig::builder();
86
87 // chromiumoxide defaults to headless; call with_head() only for headed mode
88 if !config.headless {
89 builder = builder.with_head();
90 }
91
92 if let Some(path) = &config.chrome_path {
93 builder = builder.chrome_executable(path);
94 }
95
96 if let Some(dir) = &config.user_data_dir {
97 builder = builder.user_data_dir(dir);
98 }
99
100 for arg in &args {
101 // chromiumoxide's ArgsBuilder prepends "--" when formatting args, so
102 // we strip any existing "--" prefix first to avoid "----arg" in Chrome.
103 let stripped = arg.strip_prefix("--").unwrap_or(arg.as_str());
104 builder = builder.arg(stripped);
105 }
106
107 if let Some((w, h)) = config.window_size {
108 builder = builder.window_size(w, h);
109 }
110
111 let cdp_cfg = builder
112 .build()
113 .map_err(|e| BrowserError::LaunchFailed { reason: e })?;
114
115 let (browser, mut handler) = timeout(launch_timeout, Browser::launch(cdp_cfg))
116 .await
117 .map_err(|_| BrowserError::Timeout {
118 operation: "browser.launch".to_string(),
119 duration_ms: u64::try_from(launch_timeout.as_millis()).unwrap_or(u64::MAX),
120 })?
121 .map_err(|e| BrowserError::LaunchFailed {
122 reason: e.to_string(),
123 })?;
124
125 // Spawn the chromiumoxide message handler; it must run for the browser
126 // to remain responsive.
127 tokio::spawn(async move { while handler.next().await.is_some() {} });
128
129 info!(browser_id = %id, "Browser launched successfully");
130
131 Ok(Self {
132 browser,
133 config,
134 launched_at: Instant::now(),
135 healthy: true,
136 id,
137 })
138 }
139
140 // ─── Health ───────────────────────────────────────────────────────────────
141
142 /// Returns `true` if the browser is currently considered healthy.
143 ///
144 /// This is a cached value updated by [`BrowserInstance::health_check`].
145 pub const fn is_healthy_cached(&self) -> bool {
146 self.healthy
147 }
148
149 /// Actively probe the browser with a CDP request.
150 ///
151 /// Sends `Browser.getVersion` and waits up to `cdp_timeout`. Updates the
152 /// internal healthy flag and returns the result.
153 ///
154 /// # Example
155 ///
156 /// ```no_run
157 /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
158 ///
159 /// # async fn run() -> stygian_browser::error::Result<()> {
160 /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
161 /// assert!(instance.is_healthy().await);
162 /// # Ok(())
163 /// # }
164 /// ```
165 pub async fn is_healthy(&mut self) -> bool {
166 match self.health_check().await {
167 Ok(()) => true,
168 Err(e) => {
169 warn!(browser_id = %self.id, error = %e, "Health check failed");
170 false
171 }
172 }
173 }
174
175 /// Run a health check and return a structured [`Result`].
176 ///
177 /// Pings the browser with the CDP `Browser.getVersion` RPC.
178 pub async fn health_check(&mut self) -> Result<()> {
179 let op_timeout = self.config.cdp_timeout;
180
181 timeout(op_timeout, self.browser.version())
182 .await
183 .map_err(|_| {
184 self.healthy = false;
185 BrowserError::Timeout {
186 operation: "Browser.getVersion".to_string(),
187 duration_ms: u64::try_from(op_timeout.as_millis()).unwrap_or(u64::MAX),
188 }
189 })?
190 .map_err(|e| {
191 self.healthy = false;
192 BrowserError::CdpError {
193 operation: "Browser.getVersion".to_string(),
194 message: e.to_string(),
195 }
196 })?;
197
198 self.healthy = true;
199 Ok(())
200 }
201
202 // ─── Accessors ────────────────────────────────────────────────────────────
203
204 /// Access the underlying `chromiumoxide` [`Browser`].
205 pub const fn browser(&self) -> &Browser {
206 &self.browser
207 }
208
209 /// Mutable access to the underlying `chromiumoxide` [`Browser`].
210 pub const fn browser_mut(&mut self) -> &mut Browser {
211 &mut self.browser
212 }
213
214 /// Instance ID (ULID) for log correlation.
215 pub fn id(&self) -> &str {
216 &self.id
217 }
218
219 /// How long has this instance been alive.
220 pub fn uptime(&self) -> Duration {
221 self.launched_at.elapsed()
222 }
223
224 /// The config snapshot used at launch.
225 pub const fn config(&self) -> &BrowserConfig {
226 &self.config
227 }
228
229 // ─── Shutdown ─────────────────────────────────────────────────────────────
230
231 /// Gracefully close the browser.
232 ///
233 /// Sends `Browser.close` and waits up to `cdp_timeout`. Any errors during
234 /// tear-down are logged but not propagated so the caller can always clean up.
235 ///
236 /// # Example
237 ///
238 /// ```no_run
239 /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
240 ///
241 /// # async fn run() -> stygian_browser::error::Result<()> {
242 /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
243 /// instance.shutdown().await?;
244 /// # Ok(())
245 /// # }
246 /// ```
247 pub async fn shutdown(mut self) -> Result<()> {
248 info!(browser_id = %self.id, "Shutting down browser");
249
250 let op_timeout = self.config.cdp_timeout;
251
252 if let Err(e) = timeout(op_timeout, self.browser.close()).await {
253 // Timeout — log and continue cleanup
254 warn!(
255 browser_id = %self.id,
256 "Browser.close timed out after {}ms: {e}",
257 op_timeout.as_millis()
258 );
259 }
260
261 self.healthy = false;
262 info!(browser_id = %self.id, "Browser shut down");
263 Ok(())
264 }
265
266 /// Open a new tab and return a [`crate::page::PageHandle`].
267 ///
268 /// The handle closes the tab automatically when dropped.
269 ///
270 /// # Errors
271 ///
272 /// Returns [`BrowserError::CdpError`] if a new page cannot be created.
273 ///
274 /// # Example
275 ///
276 /// ```no_run
277 /// use stygian_browser::{BrowserConfig, browser::BrowserInstance};
278 ///
279 /// # async fn run() -> stygian_browser::error::Result<()> {
280 /// let mut instance = BrowserInstance::launch(BrowserConfig::default()).await?;
281 /// let page = instance.new_page().await?;
282 /// drop(page);
283 /// instance.shutdown().await?;
284 /// # Ok(())
285 /// # }
286 /// ```
287 pub async fn new_page(&self) -> crate::error::Result<crate::page::PageHandle> {
288 use tokio::time::timeout;
289
290 let cdp_timeout = self.config.cdp_timeout;
291
292 let page = timeout(cdp_timeout, self.browser.new_page("about:blank"))
293 .await
294 .map_err(|_| crate::error::BrowserError::Timeout {
295 operation: "Browser.newPage".to_string(),
296 duration_ms: u64::try_from(cdp_timeout.as_millis()).unwrap_or(u64::MAX),
297 })?
298 .map_err(|e| crate::error::BrowserError::CdpError {
299 operation: "Browser.newPage".to_string(),
300 message: e.to_string(),
301 })?;
302
303 // Apply stealth injection scripts for all active stealth levels.
304 #[cfg(feature = "stealth")]
305 crate::stealth::apply_stealth_to_page(&page, &self.config).await?;
306
307 Ok(crate::page::PageHandle::new(page, cdp_timeout))
308 }
309}
310
311// ─── Tests ────────────────────────────────────────────────────────────────────
312
313#[cfg(test)]
314mod tests {
315 use super::*;
316
317 /// Verify `BrowserConfig` `effective_args` includes anti-detection flags.
318 ///
319 /// This is a unit test that doesn't require a real Chrome binary.
320 #[test]
321 fn effective_args_contain_automation_flag() {
322 let config = BrowserConfig::default();
323 let args = config.effective_args();
324 assert!(
325 args.iter().any(|a| a.contains("AutomationControlled")),
326 "Expected --disable-blink-features=AutomationControlled in args: {args:?}"
327 );
328 }
329
330 #[test]
331 fn proxy_arg_injected_when_set() {
332 let config = BrowserConfig::builder()
333 .proxy("http://proxy.example.com:8080".to_string())
334 .build();
335 let args = config.effective_args();
336 assert!(
337 args.iter().any(|a| a.contains("proxy.example.com")),
338 "Expected proxy arg in {args:?}"
339 );
340 }
341
342 #[test]
343 fn window_size_arg_injected() {
344 let config = BrowserConfig::builder().window_size(1280, 720).build();
345 let args = config.effective_args();
346 assert!(
347 args.iter().any(|a| a.contains("1280")),
348 "Expected window-size arg in {args:?}"
349 );
350 }
351
352 #[test]
353 fn browser_instance_is_send_sync() {
354 fn assert_send<T: Send>() {}
355 fn assert_sync<T: Sync>() {}
356 assert_send::<BrowserInstance>();
357 assert_sync::<BrowserInstance>();
358 }
359
360 #[test]
361 fn no_sandbox_absent_by_default_on_non_linux() {
362 // On non-Linux (macOS, Windows) is_containerized() always returns false,
363 // so --no-sandbox must NOT appear in the default args unless overridden.
364 // On Linux in CI/Docker the STYGIAN_DISABLE_SANDBOX env var or /.dockerenv
365 // controls this — skip the assertion there to avoid false failures.
366 #[cfg(not(target_os = "linux"))]
367 {
368 let cfg = BrowserConfig::default();
369 let args = cfg.effective_args();
370 assert!(!args.iter().any(|a| a == "--no-sandbox"));
371 }
372 }
373
374 #[test]
375 fn effective_args_include_disable_dev_shm() {
376 let cfg = BrowserConfig::default();
377 let args = cfg.effective_args();
378 assert!(args.iter().any(|a| a.contains("disable-dev-shm-usage")));
379 }
380
381 #[test]
382 fn no_window_size_arg_when_none() {
383 let cfg = BrowserConfig {
384 window_size: None,
385 ..BrowserConfig::default()
386 };
387 let args = cfg.effective_args();
388 assert!(!args.iter().any(|a| a.contains("--window-size")));
389 }
390
391 #[test]
392 fn custom_arg_appended() {
393 let cfg = BrowserConfig::builder()
394 .arg("--user-agent=MyCustomBot/1.0".to_string())
395 .build();
396 let args = cfg.effective_args();
397 assert!(args.iter().any(|a| a.contains("MyCustomBot")));
398 }
399
400 #[test]
401 fn proxy_bypass_list_arg_injected() {
402 let cfg = BrowserConfig::builder()
403 .proxy("http://proxy:8080".to_string())
404 .proxy_bypass_list("<local>,localhost".to_string())
405 .build();
406 let args = cfg.effective_args();
407 assert!(args.iter().any(|a| a.contains("proxy-bypass-list")));
408 }
409
410 #[test]
411 fn headless_mode_preserved_in_config() {
412 let cfg = BrowserConfig::builder().headless(false).build();
413 assert!(!cfg.headless);
414 let cfg2 = BrowserConfig::builder().headless(true).build();
415 assert!(cfg2.headless);
416 }
417
418 #[test]
419 fn launch_timeout_default_is_non_zero() {
420 let cfg = BrowserConfig::default();
421 assert!(!cfg.launch_timeout.is_zero());
422 }
423
424 #[test]
425 fn cdp_timeout_default_is_non_zero() {
426 let cfg = BrowserConfig::default();
427 assert!(!cfg.cdp_timeout.is_zero());
428 }
429}