viewpoint_core/browser/mod.rs
1//! Browser launching and management.
2//!
3//! This module provides the [`Browser`] type for connecting to and controlling
4//! Chromium-based browsers via the Chrome DevTools Protocol (CDP).
5//!
6//! # Connection Methods
7//!
8//! There are three ways to get a `Browser` instance:
9//!
10//! 1. **Launch a new browser** - [`Browser::launch()`] spawns a new Chromium process
11//! 2. **Connect via WebSocket URL** - [`Browser::connect()`] for direct WebSocket connection
12//! 3. **Connect via HTTP endpoint** - [`Browser::connect_over_cdp()`] discovers WebSocket URL
13//! from an HTTP endpoint like `http://localhost:9222`
14//!
15//! # Example: Launching a Browser
16//!
17//! ```no_run
18//! use viewpoint_core::Browser;
19//!
20//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
21//! let browser = Browser::launch()
22//! .headless(true)
23//! .launch()
24//! .await?;
25//!
26//! let context = browser.new_context().await?;
27//! let page = context.new_page().await?;
28//! page.goto("https://example.com").goto().await?;
29//! # Ok(())
30//! # }
31//! ```
32//!
33//! # Example: Connecting to Existing Browser (MCP-style)
34//!
35//! This is useful for MCP servers or tools that need to connect to an already-running
36//! browser instance:
37//!
38//! ```no_run
39//! use viewpoint_core::Browser;
40//! use std::time::Duration;
41//!
42//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
43//! // Connect via HTTP endpoint (discovers WebSocket URL automatically)
44//! let browser = Browser::connect_over_cdp("http://localhost:9222")
45//! .timeout(Duration::from_secs(10))
46//! .connect()
47//! .await?;
48//!
49//! // Access existing browser contexts (including the default one)
50//! let contexts = browser.contexts().await?;
51//! for context in &contexts {
52//! if context.is_default() {
53//! // The default context has the browser's existing tabs
54//! let pages = context.pages().await?;
55//! println!("Found {} existing pages", pages.len());
56//! }
57//! }
58//!
59//! // You can also create new contexts in the connected browser
60//! let new_context = browser.new_context().await?;
61//! # Ok(())
62//! # }
63//! ```
64//!
65//! # Ownership Model
66//!
67//! Browsers and contexts track ownership:
68//!
69//! - **Launched browsers** (`Browser::launch()`) are "owned" - closing them terminates the process
70//! - **Connected browsers** (`connect()`, `connect_over_cdp()`) are not owned - closing only
71//! disconnects, leaving the browser process running
72//! - **Created contexts** (`new_context()`) are owned - closing disposes them
73//! - **Discovered contexts** (`contexts()`) are not owned - closing only disconnects
74
75mod connector;
76mod context_builder;
77mod launcher;
78
79use std::process::Child;
80use std::sync::Arc;
81use std::thread;
82use std::time::Duration;
83
84use tempfile::TempDir;
85use tokio::sync::Mutex;
86use tracing::{info, warn};
87use viewpoint_cdp::CdpConnection;
88use viewpoint_cdp::protocol::target_domain::{
89 CreateBrowserContextParams, CreateBrowserContextResult, GetBrowserContextsResult,
90};
91
92use crate::context::{
93 BrowserContext, ContextOptions, StorageState, StorageStateSource,
94};
95use crate::error::BrowserError;
96
97pub use connector::ConnectOverCdpBuilder;
98pub use context_builder::NewContextBuilder;
99pub use launcher::{BrowserBuilder, UserDataDir};
100
101/// Default timeout for browser operations.
102const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
103
104/// A browser instance connected via CDP.
105///
106/// The `Browser` struct represents a connection to a Chromium-based browser.
107/// It can be obtained by:
108///
109/// - [`Browser::launch()`] - Spawn and connect to a new browser process
110/// - [`Browser::connect()`] - Connect to an existing browser via WebSocket URL
111/// - [`Browser::connect_over_cdp()`] - Connect via HTTP endpoint (auto-discovers WebSocket)
112///
113/// # Key Methods
114///
115/// - [`new_context()`](Self::new_context) - Create a new isolated browser context
116/// - [`contexts()`](Self::contexts) - List all browser contexts (including pre-existing ones)
117/// - [`close()`](Self::close) - Close the browser connection
118///
119/// # Ownership
120///
121/// Use [`is_owned()`](Self::is_owned) to check if this browser was launched by us
122/// (vs connected to an existing process). Owned browsers are terminated when closed.
123///
124/// # User Data Directory
125///
126/// By default, browsers use an isolated temporary directory for user data
127/// (cookies, localStorage, settings). This prevents conflicts when running
128/// multiple browser instances and ensures clean sessions. The temporary
129/// directory is automatically cleaned up when the browser closes or is dropped.
130///
131/// See [`UserDataDir`] for configuration options.
132#[derive(Debug)]
133pub struct Browser {
134 /// CDP connection to the browser.
135 connection: Arc<CdpConnection>,
136 /// Browser process (only present if we launched it).
137 process: Option<Mutex<Child>>,
138 /// Whether the browser was launched by us (vs connected to).
139 owned: bool,
140 /// Temporary user data directory (if using Temp or TempFromTemplate mode).
141 /// Stored here to ensure cleanup on drop.
142 _temp_user_data_dir: Option<TempDir>,
143}
144
145impl Browser {
146 /// Create a browser builder for launching a new browser.
147 ///
148 /// # Example
149 ///
150 /// ```no_run
151 /// use viewpoint_core::Browser;
152 ///
153 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
154 /// let browser = Browser::launch()
155 /// .headless(true)
156 /// .launch()
157 /// .await?;
158 /// # Ok(())
159 /// # }
160 /// ```
161 pub fn launch() -> BrowserBuilder {
162 BrowserBuilder::new()
163 }
164
165 /// Connect to an already-running browser via WebSocket URL.
166 ///
167 /// # Example
168 ///
169 /// ```no_run
170 /// use viewpoint_core::Browser;
171 ///
172 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
173 /// let browser = Browser::connect("ws://localhost:9222/devtools/browser/...").await?;
174 /// # Ok(())
175 /// # }
176 /// ```
177 ///
178 /// # Errors
179 ///
180 /// Returns an error if the connection fails.
181 pub async fn connect(ws_url: &str) -> Result<Self, BrowserError> {
182 let connection = CdpConnection::connect(ws_url).await?;
183
184 Ok(Self {
185 connection: Arc::new(connection),
186 process: None,
187 owned: false,
188 _temp_user_data_dir: None,
189 })
190 }
191
192 /// Connect to an already-running browser via HTTP endpoint or WebSocket URL.
193 ///
194 /// This method supports both:
195 /// - HTTP endpoint URLs (e.g., `http://localhost:9222`) - auto-discovers WebSocket URL
196 /// - WebSocket URLs (e.g., `ws://localhost:9222/devtools/browser/...`) - direct connection
197 ///
198 /// For HTTP endpoints, the method fetches `/json/version` to discover the WebSocket URL,
199 /// similar to Playwright's `connectOverCDP`.
200 ///
201 /// # Example
202 ///
203 /// ```no_run
204 /// use viewpoint_core::Browser;
205 /// use std::time::Duration;
206 ///
207 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
208 /// // Connect via HTTP endpoint (recommended)
209 /// let browser = Browser::connect_over_cdp("http://localhost:9222")
210 /// .connect()
211 /// .await?;
212 ///
213 /// // With custom timeout and headers
214 /// let browser = Browser::connect_over_cdp("http://localhost:9222")
215 /// .timeout(Duration::from_secs(10))
216 /// .header("Authorization", "Bearer token")
217 /// .connect()
218 /// .await?;
219 ///
220 /// // Access existing browser contexts and pages
221 /// let contexts = browser.contexts().await?;
222 /// for context in contexts {
223 /// let pages = context.pages().await?;
224 /// for page in pages {
225 /// println!("Found page: {:?}", page.target_id);
226 /// }
227 /// }
228 /// # Ok(())
229 /// # }
230 /// ```
231 pub fn connect_over_cdp(endpoint_url: impl Into<String>) -> ConnectOverCdpBuilder {
232 ConnectOverCdpBuilder::new(endpoint_url)
233 }
234
235 /// Get all browser contexts.
236 ///
237 /// Returns all existing browser contexts, including:
238 /// - Contexts created via `new_context()`
239 /// - The default context (for connected browsers)
240 /// - Any pre-existing contexts (when connecting to an already-running browser)
241 ///
242 /// # Example
243 ///
244 /// ```no_run
245 /// use viewpoint_core::Browser;
246 ///
247 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
248 /// let browser = Browser::connect_over_cdp("http://localhost:9222")
249 /// .connect()
250 /// .await?;
251 ///
252 /// let contexts = browser.contexts().await?;
253 /// println!("Found {} browser contexts", contexts.len());
254 ///
255 /// // The default context (empty string ID) represents the browser's main profile
256 /// for context in &contexts {
257 /// if context.id().is_empty() {
258 /// println!("This is the default context");
259 /// }
260 /// }
261 /// # Ok(())
262 /// # }
263 /// ```
264 ///
265 /// # Errors
266 ///
267 /// Returns an error if querying contexts fails.
268 pub async fn contexts(&self) -> Result<Vec<BrowserContext>, BrowserError> {
269 info!("Getting browser contexts");
270
271 let result: GetBrowserContextsResult = self
272 .connection
273 .send_command("Target.getBrowserContexts", None::<()>, None)
274 .await?;
275
276 let mut contexts = Vec::new();
277
278 // Always include the default context (empty string ID)
279 // The default context represents the browser's main profile
280 contexts.push(BrowserContext::from_existing(
281 self.connection.clone(),
282 String::new(), // Empty string = default context
283 ));
284
285 // Add other contexts
286 for context_id in result.browser_context_ids {
287 if !context_id.is_empty() {
288 contexts.push(BrowserContext::from_existing(
289 self.connection.clone(),
290 context_id,
291 ));
292 }
293 }
294
295 info!(count = contexts.len(), "Found browser contexts");
296
297 Ok(contexts)
298 }
299
300 /// Create a browser from an existing connection and process (legacy, no temp dir).
301 pub(crate) fn from_connection_and_process(connection: CdpConnection, process: Child) -> Self {
302 Self {
303 connection: Arc::new(connection),
304 process: Some(Mutex::new(process)),
305 owned: true,
306 _temp_user_data_dir: None,
307 }
308 }
309
310 /// Create a browser from a launch operation with optional temp directory.
311 pub(crate) fn from_launch(
312 connection: CdpConnection,
313 process: Child,
314 temp_user_data_dir: Option<TempDir>,
315 ) -> Self {
316 Self {
317 connection: Arc::new(connection),
318 process: Some(Mutex::new(process)),
319 owned: true,
320 _temp_user_data_dir: temp_user_data_dir,
321 }
322 }
323
324 /// Create a new isolated browser context.
325 ///
326 /// Browser contexts are isolated environments within the browser,
327 /// similar to incognito windows. They have their own cookies,
328 /// cache, and storage.
329 ///
330 /// # Errors
331 ///
332 /// Returns an error if context creation fails.
333 pub async fn new_context(&self) -> Result<BrowserContext, BrowserError> {
334 let result: CreateBrowserContextResult = self
335 .connection
336 .send_command(
337 "Target.createBrowserContext",
338 Some(CreateBrowserContextParams::default()),
339 None,
340 )
341 .await?;
342
343 Ok(BrowserContext::new(
344 self.connection.clone(),
345 result.browser_context_id,
346 ))
347 }
348
349 /// Create a new context options builder.
350 ///
351 /// Use this to create a browser context with custom configuration.
352 ///
353 /// # Example
354 ///
355 /// ```no_run
356 /// use viewpoint_core::{Browser, Permission};
357 ///
358 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
359 /// let browser = Browser::launch().headless(true).launch().await?;
360 ///
361 /// let context = browser.new_context_builder()
362 /// .geolocation(37.7749, -122.4194)
363 /// .permissions(vec![Permission::Geolocation])
364 /// .offline(false)
365 /// .build()
366 /// .await?;
367 /// # Ok(())
368 /// # }
369 /// ```
370 pub fn new_context_builder(&self) -> NewContextBuilder<'_> {
371 NewContextBuilder::new(self)
372 }
373
374 /// Create a new isolated browser context with options.
375 ///
376 /// # Errors
377 ///
378 /// Returns an error if context creation fails.
379 pub async fn new_context_with_options(
380 &self,
381 options: ContextOptions,
382 ) -> Result<BrowserContext, BrowserError> {
383 // Load storage state if specified
384 let storage_state = match &options.storage_state {
385 Some(StorageStateSource::Path(path)) => {
386 Some(StorageState::load(path).await.map_err(|e| {
387 BrowserError::LaunchFailed(format!("Failed to load storage state: {e}"))
388 })?)
389 }
390 Some(StorageStateSource::State(state)) => Some(state.clone()),
391 None => None,
392 };
393
394 let result: CreateBrowserContextResult = self
395 .connection
396 .send_command(
397 "Target.createBrowserContext",
398 Some(CreateBrowserContextParams::default()),
399 None,
400 )
401 .await?;
402
403 let context = BrowserContext::with_options(
404 self.connection.clone(),
405 result.browser_context_id,
406 options,
407 );
408
409 // Apply options
410 context.apply_options().await?;
411
412 // Restore storage state if any
413 if let Some(state) = storage_state {
414 // Restore cookies
415 context.add_cookies(state.cookies.clone()).await?;
416
417 // Restore localStorage via init script
418 let local_storage_script = state.to_local_storage_init_script();
419 if !local_storage_script.is_empty() {
420 context.add_init_script(&local_storage_script).await?;
421 }
422
423 // Restore IndexedDB via init script
424 let indexed_db_script = state.to_indexed_db_init_script();
425 if !indexed_db_script.is_empty() {
426 context.add_init_script(&indexed_db_script).await?;
427 }
428 }
429
430 Ok(context)
431 }
432
433 /// Close the browser.
434 ///
435 /// If this browser was launched by us, the process will be terminated
436 /// and properly reaped to prevent zombie processes.
437 /// If it was connected to, only the WebSocket connection is closed.
438 ///
439 /// # Errors
440 ///
441 /// Returns an error if closing fails.
442 pub async fn close(&self) -> Result<(), BrowserError> {
443 // If we own the process, terminate it and reap it
444 if let Some(ref process) = self.process {
445 let mut child = process.lock().await;
446 Self::kill_and_reap_async(&mut child).await;
447 }
448
449 Ok(())
450 }
451
452 /// Kill and reap a child process asynchronously.
453 ///
454 /// This method:
455 /// 1. Sends SIGKILL to the process (if still running)
456 /// 2. Waits for the process to exit and reaps it
457 ///
458 /// This prevents zombie processes by ensuring `wait()` is called.
459 async fn kill_and_reap_async(child: &mut Child) {
460 // Kill the process (ignore errors if already dead)
461 let _ = child.kill();
462
463 // Wait for the process to exit and reap it
464 // This is the critical step to prevent zombie processes
465 match child.wait() {
466 Ok(status) => {
467 info!(?status, "Browser process reaped successfully");
468 }
469 Err(e) => {
470 warn!(error = %e, "Failed to reap browser process");
471 }
472 }
473 }
474
475 /// Kill and reap a child process synchronously (for use in Drop).
476 ///
477 /// This method uses `try_wait()` (non-blocking) with retries since
478 /// `Drop` cannot be async. It attempts to reap the process a few times
479 /// with small delays to handle the case where the process hasn't exited
480 /// immediately after `kill()`.
481 ///
482 /// # Arguments
483 ///
484 /// * `child` - The child process to kill and reap
485 /// * `max_attempts` - Maximum number of try_wait attempts
486 /// * `retry_delay` - Delay between retry attempts
487 fn kill_and_reap_sync(child: &mut Child, max_attempts: u32, retry_delay: Duration) {
488 // Kill the process (ignore errors if already dead)
489 let _ = child.kill();
490
491 // Try to reap the process with retries
492 for attempt in 1..=max_attempts {
493 match child.try_wait() {
494 Ok(Some(status)) => {
495 info!(?status, attempt, "Browser process reaped successfully in Drop");
496 return;
497 }
498 Ok(None) => {
499 // Process still running, wait a bit and retry
500 if attempt < max_attempts {
501 thread::sleep(retry_delay);
502 }
503 }
504 Err(e) => {
505 warn!(error = %e, "Failed to check browser process status in Drop");
506 return;
507 }
508 }
509 }
510
511 // If we get here, the process is still running after all attempts
512 warn!(
513 max_attempts,
514 "Browser process still running after kill, will become zombie until parent exits"
515 );
516 }
517
518 /// Get a reference to the CDP connection.
519 pub fn connection(&self) -> &Arc<CdpConnection> {
520 &self.connection
521 }
522
523 /// Check if this browser was launched by us.
524 pub fn is_owned(&self) -> bool {
525 self.owned
526 }
527}
528
529impl Drop for Browser {
530 fn drop(&mut self) {
531 // Try to kill and reap the process if we own it
532 if self.owned {
533 if let Some(ref process) = self.process {
534 // We can't await in drop, so we try to kill synchronously
535 if let Ok(mut guard) = process.try_lock() {
536 // Use the sync helper with 3 attempts and 1ms delay between attempts
537 Self::kill_and_reap_sync(&mut guard, 3, Duration::from_millis(1));
538 }
539 }
540 }
541 }
542}