viewpoint_core/browser/mod.rs
1//! Browser launching and management.
2//!
3//! This module provides the [`Browser`] type for connecting to and controlling
4//! Chromium-based browsers via the Chrome DevTools Protocol (CDP).
5//!
6//! # Connection Methods
7//!
8//! There are three ways to get a `Browser` instance:
9//!
10//! 1. **Launch a new browser** - [`Browser::launch()`] spawns a new Chromium process
11//! 2. **Connect via WebSocket URL** - [`Browser::connect()`] for direct WebSocket connection
12//! 3. **Connect via HTTP endpoint** - [`Browser::connect_over_cdp()`] discovers WebSocket URL
13//! from an HTTP endpoint like `http://localhost:9222`
14//!
15//! # Example: Launching a Browser
16//!
17//! ```no_run
18//! use viewpoint_core::Browser;
19//!
20//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
21//! let browser = Browser::launch()
22//! .headless(true)
23//! .launch()
24//! .await?;
25//!
26//! let context = browser.new_context().await?;
27//! let page = context.new_page().await?;
28//! page.goto("https://example.com").goto().await?;
29//! # Ok(())
30//! # }
31//! ```
32//!
33//! # Example: Connecting to Existing Browser (MCP-style)
34//!
35//! This is useful for MCP servers or tools that need to connect to an already-running
36//! browser instance:
37//!
38//! ```no_run
39//! use viewpoint_core::Browser;
40//! use std::time::Duration;
41//!
42//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
43//! // Connect via HTTP endpoint (discovers WebSocket URL automatically)
44//! let browser = Browser::connect_over_cdp("http://localhost:9222")
45//! .timeout(Duration::from_secs(10))
46//! .connect()
47//! .await?;
48//!
49//! // Access existing browser contexts (including the default one)
50//! let contexts = browser.contexts().await?;
51//! for context in &contexts {
52//! if context.is_default() {
53//! // The default context has the browser's existing tabs
54//! let pages = context.pages().await?;
55//! println!("Found {} existing pages", pages.len());
56//! }
57//! }
58//!
59//! // You can also create new contexts in the connected browser
60//! let new_context = browser.new_context().await?;
61//! # Ok(())
62//! # }
63//! ```
64//!
65//! # Ownership Model
66//!
67//! Browsers and contexts track ownership:
68//!
69//! - **Launched browsers** (`Browser::launch()`) are "owned" - closing them terminates the process
70//! - **Connected browsers** (`connect()`, `connect_over_cdp()`) are not owned - closing only
71//! disconnects, leaving the browser process running
72//! - **Created contexts** (`new_context()`) are owned - closing disposes them
73//! - **Discovered contexts** (`contexts()`) are not owned - closing only disconnects
74
75mod connector;
76mod context_builder;
77mod launcher;
78mod process;
79
80use std::process::Child;
81use std::sync::Arc;
82use std::time::Duration;
83
84use tempfile::TempDir;
85use tokio::sync::Mutex;
86use tracing::info;
87use viewpoint_cdp::CdpConnection;
88use viewpoint_cdp::protocol::target_domain::{
89 CreateBrowserContextParams, CreateBrowserContextResult, GetBrowserContextsResult,
90};
91
92use crate::context::{BrowserContext, ContextOptions, StorageState, StorageStateSource};
93use crate::error::BrowserError;
94
95pub use connector::ConnectOverCdpBuilder;
96pub use context_builder::NewContextBuilder;
97pub use launcher::{BrowserBuilder, UserDataDir};
98
99/// Default timeout for browser operations.
100const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
101
102/// A browser instance connected via CDP.
103///
104/// The `Browser` struct represents a connection to a Chromium-based browser.
105/// It can be obtained by:
106///
107/// - [`Browser::launch()`] - Spawn and connect to a new browser process
108/// - [`Browser::connect()`] - Connect to an existing browser via WebSocket URL
109/// - [`Browser::connect_over_cdp()`] - Connect via HTTP endpoint (auto-discovers WebSocket)
110///
111/// # Key Methods
112///
113/// - [`new_context()`](Self::new_context) - Create a new isolated browser context
114/// - [`contexts()`](Self::contexts) - List all browser contexts (including pre-existing ones)
115/// - [`close()`](Self::close) - Close the browser connection
116///
117/// # Ownership
118///
119/// Use [`is_owned()`](Self::is_owned) to check if this browser was launched by us
120/// (vs connected to an existing process). Owned browsers are terminated when closed.
121///
122/// # User Data Directory
123///
124/// By default, browsers use an isolated temporary directory for user data
125/// (cookies, localStorage, settings). This prevents conflicts when running
126/// multiple browser instances and ensures clean sessions. The temporary
127/// directory is automatically cleaned up when the browser closes or is dropped.
128///
129/// See [`UserDataDir`] for configuration options.
130#[derive(Debug)]
131pub struct Browser {
132 /// CDP connection to the browser.
133 connection: Arc<CdpConnection>,
134 /// Browser process (only present if we launched it).
135 process: Option<Mutex<Child>>,
136 /// Whether the browser was launched by us (vs connected to).
137 owned: bool,
138 /// Temporary user data directory (if using Temp or TempFromTemplate mode).
139 /// Stored here to ensure cleanup on drop.
140 _temp_user_data_dir: Option<TempDir>,
141}
142
143impl Browser {
144 /// Create a browser builder for launching a new browser.
145 ///
146 /// # Example
147 ///
148 /// ```no_run
149 /// use viewpoint_core::Browser;
150 ///
151 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
152 /// let browser = Browser::launch()
153 /// .headless(true)
154 /// .launch()
155 /// .await?;
156 /// # Ok(())
157 /// # }
158 /// ```
159 pub fn launch() -> BrowserBuilder {
160 BrowserBuilder::new()
161 }
162
163 /// Connect to an already-running browser via WebSocket URL.
164 ///
165 /// # Example
166 ///
167 /// ```no_run
168 /// use viewpoint_core::Browser;
169 ///
170 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
171 /// let browser = Browser::connect("ws://localhost:9222/devtools/browser/...").await?;
172 /// # Ok(())
173 /// # }
174 /// ```
175 ///
176 /// # Errors
177 ///
178 /// Returns an error if the connection fails.
179 pub async fn connect(ws_url: &str) -> Result<Self, BrowserError> {
180 let connection = CdpConnection::connect(ws_url).await?;
181
182 // Enable target discovery to receive Target.targetCreated events
183 // This is required for automatic page tracking (popups, target="_blank" links)
184 connection
185 .send_command::<_, serde_json::Value>(
186 "Target.setDiscoverTargets",
187 Some(viewpoint_cdp::protocol::target_domain::SetDiscoverTargetsParams {
188 discover: true,
189 }),
190 None,
191 )
192 .await
193 .map_err(|e| BrowserError::ConnectionFailed(format!("Failed to enable target discovery: {e}")))?;
194
195 Ok(Self {
196 connection: Arc::new(connection),
197 process: None,
198 owned: false,
199 _temp_user_data_dir: None,
200 })
201 }
202
203 /// Connect to an already-running browser via HTTP endpoint or WebSocket URL.
204 ///
205 /// This method supports both:
206 /// - HTTP endpoint URLs (e.g., `http://localhost:9222`) - auto-discovers WebSocket URL
207 /// - WebSocket URLs (e.g., `ws://localhost:9222/devtools/browser/...`) - direct connection
208 ///
209 /// For HTTP endpoints, the method fetches `/json/version` to discover the WebSocket URL,
210 /// similar to Playwright's `connectOverCDP`.
211 ///
212 /// # Example
213 ///
214 /// ```no_run
215 /// use viewpoint_core::Browser;
216 /// use std::time::Duration;
217 ///
218 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
219 /// // Connect via HTTP endpoint (recommended)
220 /// let browser = Browser::connect_over_cdp("http://localhost:9222")
221 /// .connect()
222 /// .await?;
223 ///
224 /// // With custom timeout and headers
225 /// let browser = Browser::connect_over_cdp("http://localhost:9222")
226 /// .timeout(Duration::from_secs(10))
227 /// .header("Authorization", "Bearer token")
228 /// .connect()
229 /// .await?;
230 ///
231 /// // Access existing browser contexts and pages
232 /// let contexts = browser.contexts().await?;
233 /// for context in contexts {
234 /// let pages = context.pages().await?;
235 /// for page in &pages {
236 /// println!("Found page: {}", page.target_id());
237 /// }
238 /// }
239 /// # Ok(())
240 /// # }
241 /// ```
242 pub fn connect_over_cdp(endpoint_url: impl Into<String>) -> ConnectOverCdpBuilder {
243 ConnectOverCdpBuilder::new(endpoint_url)
244 }
245
246 /// Get all browser contexts.
247 ///
248 /// Returns all existing browser contexts, including:
249 /// - Contexts created via `new_context()`
250 /// - The default context (for connected browsers)
251 /// - Any pre-existing contexts (when connecting to an already-running browser)
252 ///
253 /// # Example
254 ///
255 /// ```no_run
256 /// use viewpoint_core::Browser;
257 ///
258 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
259 /// let browser = Browser::connect_over_cdp("http://localhost:9222")
260 /// .connect()
261 /// .await?;
262 ///
263 /// let contexts = browser.contexts().await?;
264 /// println!("Found {} browser contexts", contexts.len());
265 ///
266 /// // The default context (empty string ID) represents the browser's main profile
267 /// for context in &contexts {
268 /// if context.id().is_empty() {
269 /// println!("This is the default context");
270 /// }
271 /// }
272 /// # Ok(())
273 /// # }
274 /// ```
275 ///
276 /// # Errors
277 ///
278 /// Returns an error if querying contexts fails.
279 pub async fn contexts(&self) -> Result<Vec<BrowserContext>, BrowserError> {
280 info!("Getting browser contexts");
281
282 let result: GetBrowserContextsResult = self
283 .connection
284 .send_command("Target.getBrowserContexts", None::<()>, None)
285 .await?;
286
287 let mut contexts = Vec::new();
288
289 // Always include the default context (empty string ID)
290 // The default context represents the browser's main profile
291 contexts.push(BrowserContext::from_existing(
292 self.connection.clone(),
293 String::new(), // Empty string = default context
294 ));
295
296 // Add other contexts
297 for context_id in result.browser_context_ids {
298 if !context_id.is_empty() {
299 contexts.push(BrowserContext::from_existing(
300 self.connection.clone(),
301 context_id,
302 ));
303 }
304 }
305
306 info!(count = contexts.len(), "Found browser contexts");
307
308 Ok(contexts)
309 }
310
311 /// Create a browser from an existing connection and process (legacy, no temp dir).
312 pub(crate) fn from_connection_and_process(connection: CdpConnection, process: Child) -> Self {
313 Self {
314 connection: Arc::new(connection),
315 process: Some(Mutex::new(process)),
316 owned: true,
317 _temp_user_data_dir: None,
318 }
319 }
320
321 /// Create a browser from a launch operation with optional temp directory.
322 pub(crate) fn from_launch(
323 connection: CdpConnection,
324 process: Child,
325 temp_user_data_dir: Option<TempDir>,
326 ) -> Self {
327 Self {
328 connection: Arc::new(connection),
329 process: Some(Mutex::new(process)),
330 owned: true,
331 _temp_user_data_dir: temp_user_data_dir,
332 }
333 }
334
335 /// Create a new isolated browser context.
336 ///
337 /// Browser contexts are isolated environments within the browser,
338 /// similar to incognito windows. They have their own cookies,
339 /// cache, and storage.
340 ///
341 /// # Errors
342 ///
343 /// Returns an error if context creation fails.
344 pub async fn new_context(&self) -> Result<BrowserContext, BrowserError> {
345 let result: CreateBrowserContextResult = self
346 .connection
347 .send_command(
348 "Target.createBrowserContext",
349 Some(CreateBrowserContextParams::default()),
350 None,
351 )
352 .await?;
353
354 Ok(BrowserContext::new(
355 self.connection.clone(),
356 result.browser_context_id,
357 ))
358 }
359
360 /// Create a new context options builder.
361 ///
362 /// Use this to create a browser context with custom configuration.
363 ///
364 /// # Example
365 ///
366 /// ```no_run
367 /// use viewpoint_core::{Browser, Permission};
368 ///
369 /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
370 /// let browser = Browser::launch().headless(true).launch().await?;
371 ///
372 /// let context = browser.new_context_builder()
373 /// .geolocation(37.7749, -122.4194)
374 /// .permissions(vec![Permission::Geolocation])
375 /// .offline(false)
376 /// .build()
377 /// .await?;
378 /// # Ok(())
379 /// # }
380 /// ```
381 pub fn new_context_builder(&self) -> NewContextBuilder<'_> {
382 NewContextBuilder::new(self)
383 }
384
385 /// Create a new isolated browser context with options.
386 ///
387 /// # Errors
388 ///
389 /// Returns an error if context creation fails.
390 pub async fn new_context_with_options(
391 &self,
392 options: ContextOptions,
393 ) -> Result<BrowserContext, BrowserError> {
394 // Load storage state if specified
395 let storage_state = match &options.storage_state {
396 Some(StorageStateSource::Path(path)) => {
397 Some(StorageState::load(path).await.map_err(|e| {
398 BrowserError::LaunchFailed(format!("Failed to load storage state: {e}"))
399 })?)
400 }
401 Some(StorageStateSource::State(state)) => Some(state.clone()),
402 None => None,
403 };
404
405 // Build CDP params with proxy configuration if specified
406 let create_params = match &options.proxy {
407 Some(proxy) => CreateBrowserContextParams {
408 dispose_on_detach: None,
409 proxy_server: Some(proxy.server.clone()),
410 proxy_bypass_list: proxy.bypass.clone(),
411 },
412 None => CreateBrowserContextParams::default(),
413 };
414
415 let result: CreateBrowserContextResult = self
416 .connection
417 .send_command("Target.createBrowserContext", Some(create_params), None)
418 .await?;
419
420 let context = BrowserContext::with_options(
421 self.connection.clone(),
422 result.browser_context_id,
423 options,
424 );
425
426 // Apply options
427 context.apply_options().await?;
428
429 // Restore storage state if any
430 if let Some(state) = storage_state {
431 // Restore cookies
432 context.add_cookies(state.cookies.clone()).await?;
433
434 // Restore localStorage via init script
435 let local_storage_script = state.to_local_storage_init_script();
436 if !local_storage_script.is_empty() {
437 context.add_init_script(&local_storage_script).await?;
438 }
439
440 // Restore IndexedDB via init script
441 let indexed_db_script = state.to_indexed_db_init_script();
442 if !indexed_db_script.is_empty() {
443 context.add_init_script(&indexed_db_script).await?;
444 }
445 }
446
447 Ok(context)
448 }
449
450 /// Close the browser.
451 ///
452 /// If this browser was launched by us, the process will be terminated
453 /// and properly reaped to prevent zombie processes.
454 /// If it was connected to, only the WebSocket connection is closed.
455 ///
456 /// # Errors
457 ///
458 /// Returns an error if closing fails.
459 pub async fn close(&self) -> Result<(), BrowserError> {
460 // If we own the process, terminate it and reap it
461 if let Some(ref process_mutex) = self.process {
462 let mut child = process_mutex.lock().await;
463 process::kill_and_reap_async(&mut child).await;
464 }
465
466 Ok(())
467 }
468
469 /// Get a reference to the CDP connection.
470 pub fn connection(&self) -> &Arc<CdpConnection> {
471 &self.connection
472 }
473
474 /// Check if this browser was launched by us.
475 pub fn is_owned(&self) -> bool {
476 self.owned
477 }
478}
479
480impl Drop for Browser {
481 fn drop(&mut self) {
482 // Try to kill and reap the process if we own it
483 if self.owned {
484 if let Some(ref process_mutex) = self.process {
485 // We can't await in drop, so we try to kill synchronously
486 if let Ok(mut guard) = process_mutex.try_lock() {
487 // Use the sync helper with 10 attempts and 10ms delay between attempts (100ms total)
488 process::kill_and_reap_sync(&mut guard, 10, Duration::from_millis(10));
489 }
490 }
491 }
492 }
493}