viewpoint_core/browser/mod.rs
1//! Browser launching and management.
2//!
3//! This module provides the [`Browser`] type for connecting to and controlling
4//! Chromium-based browsers via the Chrome DevTools Protocol (CDP).
5//!
6//! # Connection Methods
7//!
8//! There are three ways to get a `Browser` instance:
9//!
10//! 1. **Launch a new browser** - [`Browser::launch()`] spawns a new Chromium process
11//! 2. **Connect via WebSocket URL** - [`Browser::connect()`] for direct WebSocket connection
12//! 3. **Connect via HTTP endpoint** - [`Browser::connect_over_cdp()`] discovers WebSocket URL
13//! from an HTTP endpoint like `http://localhost:9222`
14//!
15//! # Example: Launching a Browser
16//!
17//! ```no_run
18//! use viewpoint_core::Browser;
19//!
20//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
21//! let browser = Browser::launch()
22//! .headless(true)
23//! .launch()
24//! .await?;
25//!
26//! let context = browser.new_context().await?;
27//! let page = context.new_page().await?;
28//! page.goto("https://example.com").goto().await?;
29//! # Ok(())
30//! # }
31//! ```
32//!
33//! # Example: Connecting to Existing Browser (MCP-style)
34//!
35//! This is useful for MCP servers or tools that need to connect to an already-running
36//! browser instance:
37//!
38//! ```no_run
39//! use viewpoint_core::Browser;
40//! use std::time::Duration;
41//!
42//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
43//! // Connect via HTTP endpoint (discovers WebSocket URL automatically)
44//! let browser = Browser::connect_over_cdp("http://localhost:9222")
45//! .timeout(Duration::from_secs(10))
46//! .connect()
47//! .await?;
48//!
49//! // Access existing browser contexts (including the default one)
50//! let contexts = browser.contexts().await?;
51//! for context in &contexts {
52//! if context.is_default() {
53//! // The default context has the browser's existing tabs
54//! let pages = context.pages().await?;
55//! println!("Found {} existing pages", pages.len());
56//! }
57//! }
58//!
59//! // You can also create new contexts in the connected browser
60//! let new_context = browser.new_context().await?;
61//! # Ok(())
62//! # }
63//! ```
64//!
65//! # Ownership Model
66//!
67//! Browsers and contexts track ownership:
68//!
69//! - **Launched browsers** (`Browser::launch()`) are "owned" - closing them terminates the process
70//! - **Connected browsers** (`connect()`, `connect_over_cdp()`) are not owned - closing only
71//! disconnects, leaving the browser process running
72//! - **Created contexts** (`new_context()`) are owned - closing disposes them
73//! - **Discovered contexts** (`contexts()`) are not owned - closing only disconnects
74
75mod connector;
76mod context_builder;
77mod launcher;
78
79use std::process::Child;
80use std::sync::Arc;
81use std::thread;
82use std::time::Duration;
83
84use tempfile::TempDir;
85use tokio::sync::Mutex;
86use tracing::{info, warn};
87use viewpoint_cdp::CdpConnection;
88use viewpoint_cdp::protocol::target_domain::{
89 CreateBrowserContextParams, CreateBrowserContextResult, GetBrowserContextsResult,
90};
91
92use crate::context::{BrowserContext, ContextOptions, StorageState, StorageStateSource};
93use crate::error::BrowserError;
94
95pub use connector::ConnectOverCdpBuilder;
96pub use context_builder::NewContextBuilder;
97pub use launcher::{BrowserBuilder, UserDataDir};
98
99/// Default timeout for browser operations.
100const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
101
102/// A browser instance connected via CDP.
103///
104/// The `Browser` struct represents a connection to a Chromium-based browser.
105/// It can be obtained by:
106///
107/// - [`Browser::launch()`] - Spawn and connect to a new browser process
108/// - [`Browser::connect()`] - Connect to an existing browser via WebSocket URL
109/// - [`Browser::connect_over_cdp()`] - Connect via HTTP endpoint (auto-discovers WebSocket)
110///
111/// # Key Methods
112///
113/// - [`new_context()`](Self::new_context) - Create a new isolated browser context
114/// - [`contexts()`](Self::contexts) - List all browser contexts (including pre-existing ones)
115/// - [`close()`](Self::close) - Close the browser connection
116///
117/// # Ownership
118///
119/// Use [`is_owned()`](Self::is_owned) to check if this browser was launched by us
120/// (vs connected to an existing process). Owned browsers are terminated when closed.
121///
122/// # User Data Directory
123///
124/// By default, browsers use an isolated temporary directory for user data
125/// (cookies, localStorage, settings). This prevents conflicts when running
126/// multiple browser instances and ensures clean sessions. The temporary
127/// directory is automatically cleaned up when the browser closes or is dropped.
128///
129/// See [`UserDataDir`] for configuration options.
130#[derive(Debug)]
131pub struct Browser {
132 /// CDP connection to the browser.
133 connection: Arc<CdpConnection>,
134 /// Browser process (only present if we launched it).
135 process: Option<Mutex<Child>>,
136 /// Whether the browser was launched by us (vs connected to).
137 owned: bool,
138 /// Temporary user data directory (if using Temp or TempFromTemplate mode).
139 /// Stored here to ensure cleanup on drop.
140 _temp_user_data_dir: Option<TempDir>,
141}
142
143impl Browser {
144 /// Create a browser builder for launching a new browser.
145 ///
146 /// # Example
147 ///
148 /// ```no_run
149 /// use viewpoint_core::Browser;
150 ///
151 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
152 /// let browser = Browser::launch()
153 /// .headless(true)
154 /// .launch()
155 /// .await?;
156 /// # Ok(())
157 /// # }
158 /// ```
159 pub fn launch() -> BrowserBuilder {
160 BrowserBuilder::new()
161 }
162
163 /// Connect to an already-running browser via WebSocket URL.
164 ///
165 /// # Example
166 ///
167 /// ```no_run
168 /// use viewpoint_core::Browser;
169 ///
170 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
171 /// let browser = Browser::connect("ws://localhost:9222/devtools/browser/...").await?;
172 /// # Ok(())
173 /// # }
174 /// ```
175 ///
176 /// # Errors
177 ///
178 /// Returns an error if the connection fails.
179 pub async fn connect(ws_url: &str) -> Result<Self, BrowserError> {
180 let connection = CdpConnection::connect(ws_url).await?;
181
182 Ok(Self {
183 connection: Arc::new(connection),
184 process: None,
185 owned: false,
186 _temp_user_data_dir: None,
187 })
188 }
189
190 /// Connect to an already-running browser via HTTP endpoint or WebSocket URL.
191 ///
192 /// This method supports both:
193 /// - HTTP endpoint URLs (e.g., `http://localhost:9222`) - auto-discovers WebSocket URL
194 /// - WebSocket URLs (e.g., `ws://localhost:9222/devtools/browser/...`) - direct connection
195 ///
196 /// For HTTP endpoints, the method fetches `/json/version` to discover the WebSocket URL,
197 /// similar to Playwright's `connectOverCDP`.
198 ///
199 /// # Example
200 ///
201 /// ```no_run
202 /// use viewpoint_core::Browser;
203 /// use std::time::Duration;
204 ///
205 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
206 /// // Connect via HTTP endpoint (recommended)
207 /// let browser = Browser::connect_over_cdp("http://localhost:9222")
208 /// .connect()
209 /// .await?;
210 ///
211 /// // With custom timeout and headers
212 /// let browser = Browser::connect_over_cdp("http://localhost:9222")
213 /// .timeout(Duration::from_secs(10))
214 /// .header("Authorization", "Bearer token")
215 /// .connect()
216 /// .await?;
217 ///
218 /// // Access existing browser contexts and pages
219 /// let contexts = browser.contexts().await?;
220 /// for context in contexts {
221 /// let pages = context.pages().await?;
222 /// for page in pages {
223 /// println!("Found page: {:?}", page.target_id);
224 /// }
225 /// }
226 /// # Ok(())
227 /// # }
228 /// ```
229 pub fn connect_over_cdp(endpoint_url: impl Into<String>) -> ConnectOverCdpBuilder {
230 ConnectOverCdpBuilder::new(endpoint_url)
231 }
232
233 /// Get all browser contexts.
234 ///
235 /// Returns all existing browser contexts, including:
236 /// - Contexts created via `new_context()`
237 /// - The default context (for connected browsers)
238 /// - Any pre-existing contexts (when connecting to an already-running browser)
239 ///
240 /// # Example
241 ///
242 /// ```no_run
243 /// use viewpoint_core::Browser;
244 ///
245 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
246 /// let browser = Browser::connect_over_cdp("http://localhost:9222")
247 /// .connect()
248 /// .await?;
249 ///
250 /// let contexts = browser.contexts().await?;
251 /// println!("Found {} browser contexts", contexts.len());
252 ///
253 /// // The default context (empty string ID) represents the browser's main profile
254 /// for context in &contexts {
255 /// if context.id().is_empty() {
256 /// println!("This is the default context");
257 /// }
258 /// }
259 /// # Ok(())
260 /// # }
261 /// ```
262 ///
263 /// # Errors
264 ///
265 /// Returns an error if querying contexts fails.
266 pub async fn contexts(&self) -> Result<Vec<BrowserContext>, BrowserError> {
267 info!("Getting browser contexts");
268
269 let result: GetBrowserContextsResult = self
270 .connection
271 .send_command("Target.getBrowserContexts", None::<()>, None)
272 .await?;
273
274 let mut contexts = Vec::new();
275
276 // Always include the default context (empty string ID)
277 // The default context represents the browser's main profile
278 contexts.push(BrowserContext::from_existing(
279 self.connection.clone(),
280 String::new(), // Empty string = default context
281 ));
282
283 // Add other contexts
284 for context_id in result.browser_context_ids {
285 if !context_id.is_empty() {
286 contexts.push(BrowserContext::from_existing(
287 self.connection.clone(),
288 context_id,
289 ));
290 }
291 }
292
293 info!(count = contexts.len(), "Found browser contexts");
294
295 Ok(contexts)
296 }
297
298 /// Create a browser from an existing connection and process (legacy, no temp dir).
299 pub(crate) fn from_connection_and_process(connection: CdpConnection, process: Child) -> Self {
300 Self {
301 connection: Arc::new(connection),
302 process: Some(Mutex::new(process)),
303 owned: true,
304 _temp_user_data_dir: None,
305 }
306 }
307
308 /// Create a browser from a launch operation with optional temp directory.
309 pub(crate) fn from_launch(
310 connection: CdpConnection,
311 process: Child,
312 temp_user_data_dir: Option<TempDir>,
313 ) -> Self {
314 Self {
315 connection: Arc::new(connection),
316 process: Some(Mutex::new(process)),
317 owned: true,
318 _temp_user_data_dir: temp_user_data_dir,
319 }
320 }
321
322 /// Create a new isolated browser context.
323 ///
324 /// Browser contexts are isolated environments within the browser,
325 /// similar to incognito windows. They have their own cookies,
326 /// cache, and storage.
327 ///
328 /// # Errors
329 ///
330 /// Returns an error if context creation fails.
331 pub async fn new_context(&self) -> Result<BrowserContext, BrowserError> {
332 let result: CreateBrowserContextResult = self
333 .connection
334 .send_command(
335 "Target.createBrowserContext",
336 Some(CreateBrowserContextParams::default()),
337 None,
338 )
339 .await?;
340
341 Ok(BrowserContext::new(
342 self.connection.clone(),
343 result.browser_context_id,
344 ))
345 }
346
347 /// Create a new context options builder.
348 ///
349 /// Use this to create a browser context with custom configuration.
350 ///
351 /// # Example
352 ///
353 /// ```no_run
354 /// use viewpoint_core::{Browser, Permission};
355 ///
356 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
357 /// let browser = Browser::launch().headless(true).launch().await?;
358 ///
359 /// let context = browser.new_context_builder()
360 /// .geolocation(37.7749, -122.4194)
361 /// .permissions(vec![Permission::Geolocation])
362 /// .offline(false)
363 /// .build()
364 /// .await?;
365 /// # Ok(())
366 /// # }
367 /// ```
368 pub fn new_context_builder(&self) -> NewContextBuilder<'_> {
369 NewContextBuilder::new(self)
370 }
371
372 /// Create a new isolated browser context with options.
373 ///
374 /// # Errors
375 ///
376 /// Returns an error if context creation fails.
377 pub async fn new_context_with_options(
378 &self,
379 options: ContextOptions,
380 ) -> Result<BrowserContext, BrowserError> {
381 // Load storage state if specified
382 let storage_state = match &options.storage_state {
383 Some(StorageStateSource::Path(path)) => {
384 Some(StorageState::load(path).await.map_err(|e| {
385 BrowserError::LaunchFailed(format!("Failed to load storage state: {e}"))
386 })?)
387 }
388 Some(StorageStateSource::State(state)) => Some(state.clone()),
389 None => None,
390 };
391
392 // Build CDP params with proxy configuration if specified
393 let create_params = match &options.proxy {
394 Some(proxy) => CreateBrowserContextParams {
395 dispose_on_detach: None,
396 proxy_server: Some(proxy.server.clone()),
397 proxy_bypass_list: proxy.bypass.clone(),
398 },
399 None => CreateBrowserContextParams::default(),
400 };
401
402 let result: CreateBrowserContextResult = self
403 .connection
404 .send_command(
405 "Target.createBrowserContext",
406 Some(create_params),
407 None,
408 )
409 .await?;
410
411 let context = BrowserContext::with_options(
412 self.connection.clone(),
413 result.browser_context_id,
414 options,
415 );
416
417 // Apply options
418 context.apply_options().await?;
419
420 // Restore storage state if any
421 if let Some(state) = storage_state {
422 // Restore cookies
423 context.add_cookies(state.cookies.clone()).await?;
424
425 // Restore localStorage via init script
426 let local_storage_script = state.to_local_storage_init_script();
427 if !local_storage_script.is_empty() {
428 context.add_init_script(&local_storage_script).await?;
429 }
430
431 // Restore IndexedDB via init script
432 let indexed_db_script = state.to_indexed_db_init_script();
433 if !indexed_db_script.is_empty() {
434 context.add_init_script(&indexed_db_script).await?;
435 }
436 }
437
438 Ok(context)
439 }
440
441 /// Close the browser.
442 ///
443 /// If this browser was launched by us, the process will be terminated
444 /// and properly reaped to prevent zombie processes.
445 /// If it was connected to, only the WebSocket connection is closed.
446 ///
447 /// # Errors
448 ///
449 /// Returns an error if closing fails.
450 pub async fn close(&self) -> Result<(), BrowserError> {
451 // If we own the process, terminate it and reap it
452 if let Some(ref process) = self.process {
453 let mut child = process.lock().await;
454 Self::kill_and_reap_async(&mut child).await;
455 }
456
457 Ok(())
458 }
459
460 /// Kill and reap a child process asynchronously.
461 ///
462 /// This method:
463 /// 1. Sends SIGKILL to the process (if still running)
464 /// 2. Waits for the process to exit and reaps it
465 ///
466 /// This prevents zombie processes by ensuring `wait()` is called.
467 async fn kill_and_reap_async(child: &mut Child) {
468 // Kill the process (ignore errors if already dead)
469 let _ = child.kill();
470
471 // Wait for the process to exit and reap it
472 // This is the critical step to prevent zombie processes
473 match child.wait() {
474 Ok(status) => {
475 info!(?status, "Browser process reaped successfully");
476 }
477 Err(e) => {
478 warn!(error = %e, "Failed to reap browser process");
479 }
480 }
481 }
482
483 /// Kill and reap a child process synchronously (for use in Drop).
484 ///
485 /// This method uses `try_wait()` (non-blocking) with retries since
486 /// `Drop` cannot be async. It attempts to reap the process a few times
487 /// with small delays to handle the case where the process hasn't exited
488 /// immediately after `kill()`.
489 ///
490 /// # Arguments
491 ///
492 /// * `child` - The child process to kill and reap
493 /// * `max_attempts` - Maximum number of try_wait attempts
494 /// * `retry_delay` - Delay between retry attempts
495 fn kill_and_reap_sync(child: &mut Child, max_attempts: u32, retry_delay: Duration) {
496 // Kill the process (ignore errors if already dead)
497 let _ = child.kill();
498
499 // Try to reap the process with retries
500 for attempt in 1..=max_attempts {
501 match child.try_wait() {
502 Ok(Some(status)) => {
503 info!(
504 ?status,
505 attempt, "Browser process reaped successfully in Drop"
506 );
507 return;
508 }
509 Ok(None) => {
510 // Process still running, wait a bit and retry
511 if attempt < max_attempts {
512 thread::sleep(retry_delay);
513 }
514 }
515 Err(e) => {
516 warn!(error = %e, "Failed to check browser process status in Drop");
517 return;
518 }
519 }
520 }
521
522 // If we get here, the process is still running after all attempts
523 warn!(
524 max_attempts,
525 "Browser process still running after kill, will become zombie until parent exits"
526 );
527 }
528
529 /// Get a reference to the CDP connection.
530 pub fn connection(&self) -> &Arc<CdpConnection> {
531 &self.connection
532 }
533
534 /// Check if this browser was launched by us.
535 pub fn is_owned(&self) -> bool {
536 self.owned
537 }
538}
539
540impl Drop for Browser {
541 fn drop(&mut self) {
542 // Try to kill and reap the process if we own it
543 if self.owned {
544 if let Some(ref process) = self.process {
545 // We can't await in drop, so we try to kill synchronously
546 if let Ok(mut guard) = process.try_lock() {
547 // Use the sync helper with 10 attempts and 10ms delay between attempts (100ms total)
548 Self::kill_and_reap_sync(&mut guard, 10, Duration::from_millis(10));
549 }
550 }
551 }
552 }
553}