viewpoint_core/browser/
mod.rs

1//! Browser launching and management.
2//!
3//! This module provides the [`Browser`] type for connecting to and controlling
4//! Chromium-based browsers via the Chrome DevTools Protocol (CDP).
5//!
6//! # Connection Methods
7//!
8//! There are three ways to get a `Browser` instance:
9//!
10//! 1. **Launch a new browser** - [`Browser::launch()`] spawns a new Chromium process
11//! 2. **Connect via WebSocket URL** - [`Browser::connect()`] for direct WebSocket connection  
12//! 3. **Connect via HTTP endpoint** - [`Browser::connect_over_cdp()`] discovers WebSocket URL
13//!    from an HTTP endpoint like `http://localhost:9222`
14//!
15//! # Example: Launching a Browser
16//!
17//! ```no_run
18//! use viewpoint_core::Browser;
19//!
20//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
21//! let browser = Browser::launch()
22//!     .headless(true)
23//!     .launch()
24//!     .await?;
25//!
26//! let context = browser.new_context().await?;
27//! let page = context.new_page().await?;
28//! page.goto("https://example.com").goto().await?;
29//! # Ok(())
30//! # }
31//! ```
32//!
33//! # Example: Connecting to Existing Browser (MCP-style)
34//!
35//! This is useful for MCP servers or tools that need to connect to an already-running
36//! browser instance:
37//!
38//! ```no_run
39//! use viewpoint_core::Browser;
40//! use std::time::Duration;
41//!
42//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
43//! // Connect via HTTP endpoint (discovers WebSocket URL automatically)
44//! let browser = Browser::connect_over_cdp("http://localhost:9222")
45//!     .timeout(Duration::from_secs(10))
46//!     .connect()
47//!     .await?;
48//!
49//! // Access existing browser contexts (including the default one)
50//! let contexts = browser.contexts().await?;
51//! for context in &contexts {
52//!     if context.is_default() {
53//!         // The default context has the browser's existing tabs
54//!         let pages = context.pages().await?;
55//!         println!("Found {} existing pages", pages.len());
56//!     }
57//! }
58//!
59//! // You can also create new contexts in the connected browser
60//! let new_context = browser.new_context().await?;
61//! # Ok(())
62//! # }
63//! ```
64//!
65//! # Ownership Model
66//!
67//! Browsers and contexts track ownership:
68//!
69//! - **Launched browsers** (`Browser::launch()`) are "owned" - closing them terminates the process
70//! - **Connected browsers** (`connect()`, `connect_over_cdp()`) are not owned - closing only
71//!   disconnects, leaving the browser process running
72//! - **Created contexts** (`new_context()`) are owned - closing disposes them
73//! - **Discovered contexts** (`contexts()`) are not owned - closing only disconnects
74
75mod connector;
76mod context_builder;
77mod launcher;
78mod process;
79
80use std::process::Child;
81use std::sync::Arc;
82use std::time::Duration;
83
84use tempfile::TempDir;
85use tokio::sync::Mutex;
86use tracing::info;
87use viewpoint_cdp::CdpConnection;
88use viewpoint_cdp::protocol::target_domain::{
89    CreateBrowserContextParams, CreateBrowserContextResult, GetBrowserContextsResult,
90};
91
92use crate::context::{BrowserContext, ContextOptions, StorageState, StorageStateSource};
93use crate::error::BrowserError;
94
95pub use connector::ConnectOverCdpBuilder;
96pub use context_builder::NewContextBuilder;
97pub use launcher::{BrowserBuilder, UserDataDir};
98
99/// Default timeout for browser operations.
100const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
101
102/// A browser instance connected via CDP.
103///
104/// The `Browser` struct represents a connection to a Chromium-based browser.
105/// It can be obtained by:
106///
107/// - [`Browser::launch()`] - Spawn and connect to a new browser process
108/// - [`Browser::connect()`] - Connect to an existing browser via WebSocket URL
109/// - [`Browser::connect_over_cdp()`] - Connect via HTTP endpoint (auto-discovers WebSocket)
110///
111/// # Key Methods
112///
113/// - [`new_context()`](Self::new_context) - Create a new isolated browser context
114/// - [`contexts()`](Self::contexts) - List all browser contexts (including pre-existing ones)
115/// - [`close()`](Self::close) - Close the browser connection
116///
117/// # Ownership
118///
119/// Use [`is_owned()`](Self::is_owned) to check if this browser was launched by us
120/// (vs connected to an existing process). Owned browsers are terminated when closed.
121///
122/// # User Data Directory
123///
124/// By default, browsers use an isolated temporary directory for user data
125/// (cookies, localStorage, settings). This prevents conflicts when running
126/// multiple browser instances and ensures clean sessions. The temporary
127/// directory is automatically cleaned up when the browser closes or is dropped.
128///
129/// See [`UserDataDir`] for configuration options.
130#[derive(Debug)]
131pub struct Browser {
132    /// CDP connection to the browser.
133    connection: Arc<CdpConnection>,
134    /// Browser process (only present if we launched it).
135    process: Option<Mutex<Child>>,
136    /// Whether the browser was launched by us (vs connected to).
137    owned: bool,
138    /// Temporary user data directory (if using Temp or TempFromTemplate mode).
139    /// Stored here to ensure cleanup on drop.
140    _temp_user_data_dir: Option<TempDir>,
141}
142
143impl Browser {
144    /// Create a browser builder for launching a new browser.
145    ///
146    /// # Example
147    ///
148    /// ```no_run
149    /// use viewpoint_core::Browser;
150    ///
151    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
152    /// let browser = Browser::launch()
153    ///     .headless(true)
154    ///     .launch()
155    ///     .await?;
156    /// # Ok(())
157    /// # }
158    /// ```
159    pub fn launch() -> BrowserBuilder {
160        BrowserBuilder::new()
161    }
162
163    /// Connect to an already-running browser via WebSocket URL.
164    ///
165    /// # Example
166    ///
167    /// ```no_run
168    /// use viewpoint_core::Browser;
169    ///
170    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
171    /// let browser = Browser::connect("ws://localhost:9222/devtools/browser/...").await?;
172    /// # Ok(())
173    /// # }
174    /// ```
175    ///
176    /// # Errors
177    ///
178    /// Returns an error if the connection fails.
179    pub async fn connect(ws_url: &str) -> Result<Self, BrowserError> {
180        let connection = CdpConnection::connect(ws_url).await?;
181
182        // Enable target discovery to receive Target.targetCreated events
183        // This is required for automatic page tracking (popups, target="_blank" links)
184        connection
185            .send_command::<_, serde_json::Value>(
186                "Target.setDiscoverTargets",
187                Some(
188                    viewpoint_cdp::protocol::target_domain::SetDiscoverTargetsParams {
189                        discover: true,
190                    },
191                ),
192                None,
193            )
194            .await
195            .map_err(|e| {
196                BrowserError::ConnectionFailed(format!("Failed to enable target discovery: {e}"))
197            })?;
198
199        Ok(Self {
200            connection: Arc::new(connection),
201            process: None,
202            owned: false,
203            _temp_user_data_dir: None,
204        })
205    }
206
207    /// Connect to an already-running browser via HTTP endpoint or WebSocket URL.
208    ///
209    /// This method supports both:
210    /// - HTTP endpoint URLs (e.g., `http://localhost:9222`) - auto-discovers WebSocket URL
211    /// - WebSocket URLs (e.g., `ws://localhost:9222/devtools/browser/...`) - direct connection
212    ///
213    /// For HTTP endpoints, the method fetches `/json/version` to discover the WebSocket URL,
214    /// similar to Playwright's `connectOverCDP`.
215    ///
216    /// # Example
217    ///
218    /// ```no_run
219    /// use viewpoint_core::Browser;
220    /// use std::time::Duration;
221    ///
222    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
223    /// // Connect via HTTP endpoint (recommended)
224    /// let browser = Browser::connect_over_cdp("http://localhost:9222")
225    ///     .connect()
226    ///     .await?;
227    ///
228    /// // With custom timeout and headers
229    /// let browser = Browser::connect_over_cdp("http://localhost:9222")
230    ///     .timeout(Duration::from_secs(10))
231    ///     .header("Authorization", "Bearer token")
232    ///     .connect()
233    ///     .await?;
234    ///
235    /// // Access existing browser contexts and pages
236    /// let contexts = browser.contexts().await?;
237    /// for context in contexts {
238    ///     let pages = context.pages().await?;
239    ///     for page in &pages {
240    ///         println!("Found page: {}", page.target_id());
241    ///     }
242    /// }
243    /// # Ok(())
244    /// # }
245    /// ```
246    pub fn connect_over_cdp(endpoint_url: impl Into<String>) -> ConnectOverCdpBuilder {
247        ConnectOverCdpBuilder::new(endpoint_url)
248    }
249
250    /// Get all browser contexts.
251    ///
252    /// Returns all existing browser contexts, including:
253    /// - Contexts created via `new_context()`
254    /// - The default context (for connected browsers)
255    /// - Any pre-existing contexts (when connecting to an already-running browser)
256    ///
257    /// # Example
258    ///
259    /// ```no_run
260    /// use viewpoint_core::Browser;
261    ///
262    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
263    /// let browser = Browser::connect_over_cdp("http://localhost:9222")
264    ///     .connect()
265    ///     .await?;
266    ///
267    /// let contexts = browser.contexts().await?;
268    /// println!("Found {} browser contexts", contexts.len());
269    ///
270    /// // The default context (empty string ID) represents the browser's main profile
271    /// for context in &contexts {
272    ///     if context.id().is_empty() {
273    ///         println!("This is the default context");
274    ///     }
275    /// }
276    /// # Ok(())
277    /// # }
278    /// ```
279    ///
280    /// # Errors
281    ///
282    /// Returns an error if querying contexts fails.
283    pub async fn contexts(&self) -> Result<Vec<BrowserContext>, BrowserError> {
284        info!("Getting browser contexts");
285
286        let result: GetBrowserContextsResult = self
287            .connection
288            .send_command("Target.getBrowserContexts", None::<()>, None)
289            .await?;
290
291        let mut contexts = Vec::new();
292
293        // Always include the default context (empty string ID)
294        // The default context represents the browser's main profile
295        contexts.push(BrowserContext::from_existing(
296            self.connection.clone(),
297            String::new(), // Empty string = default context
298        ));
299
300        // Add other contexts
301        for context_id in result.browser_context_ids {
302            if !context_id.is_empty() {
303                contexts.push(BrowserContext::from_existing(
304                    self.connection.clone(),
305                    context_id,
306                ));
307            }
308        }
309
310        info!(count = contexts.len(), "Found browser contexts");
311
312        Ok(contexts)
313    }
314
315    /// Create a browser from an existing connection and process (legacy, no temp dir).
316    pub(crate) fn from_connection_and_process(connection: CdpConnection, process: Child) -> Self {
317        Self {
318            connection: Arc::new(connection),
319            process: Some(Mutex::new(process)),
320            owned: true,
321            _temp_user_data_dir: None,
322        }
323    }
324
325    /// Create a browser from a launch operation with optional temp directory.
326    pub(crate) fn from_launch(
327        connection: CdpConnection,
328        process: Child,
329        temp_user_data_dir: Option<TempDir>,
330    ) -> Self {
331        Self {
332            connection: Arc::new(connection),
333            process: Some(Mutex::new(process)),
334            owned: true,
335            _temp_user_data_dir: temp_user_data_dir,
336        }
337    }
338
339    /// Create a new isolated browser context.
340    ///
341    /// Browser contexts are isolated environments within the browser,
342    /// similar to incognito windows. They have their own cookies,
343    /// cache, and storage.
344    ///
345    /// # Errors
346    ///
347    /// Returns an error if context creation fails.
348    pub async fn new_context(&self) -> Result<BrowserContext, BrowserError> {
349        let result: CreateBrowserContextResult = self
350            .connection
351            .send_command(
352                "Target.createBrowserContext",
353                Some(CreateBrowserContextParams::default()),
354                None,
355            )
356            .await?;
357
358        Ok(BrowserContext::new(
359            self.connection.clone(),
360            result.browser_context_id,
361        ))
362    }
363
364    /// Create a new context options builder.
365    ///
366    /// Use this to create a browser context with custom configuration.
367    ///
368    /// # Example
369    ///
370    /// ```no_run
371    /// use viewpoint_core::{Browser, Permission};
372    ///
373    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
374    /// let browser = Browser::launch().headless(true).launch().await?;
375    ///
376    /// let context = browser.new_context_builder()
377    ///     .geolocation(37.7749, -122.4194)
378    ///     .permissions(vec![Permission::Geolocation])
379    ///     .offline(false)
380    ///     .build()
381    ///     .await?;
382    /// # Ok(())
383    /// # }
384    /// ```
385    pub fn new_context_builder(&self) -> NewContextBuilder<'_> {
386        NewContextBuilder::new(self)
387    }
388
389    /// Create a new isolated browser context with options.
390    ///
391    /// # Errors
392    ///
393    /// Returns an error if context creation fails.
394    pub async fn new_context_with_options(
395        &self,
396        options: ContextOptions,
397    ) -> Result<BrowserContext, BrowserError> {
398        // Load storage state if specified
399        let storage_state = match &options.storage_state {
400            Some(StorageStateSource::Path(path)) => {
401                Some(StorageState::load(path).await.map_err(|e| {
402                    BrowserError::LaunchFailed(format!("Failed to load storage state: {e}"))
403                })?)
404            }
405            Some(StorageStateSource::State(state)) => Some(state.clone()),
406            None => None,
407        };
408
409        // Build CDP params with proxy configuration if specified
410        let create_params = match &options.proxy {
411            Some(proxy) => CreateBrowserContextParams {
412                dispose_on_detach: None,
413                proxy_server: Some(proxy.server.clone()),
414                proxy_bypass_list: proxy.bypass.clone(),
415            },
416            None => CreateBrowserContextParams::default(),
417        };
418
419        let result: CreateBrowserContextResult = self
420            .connection
421            .send_command("Target.createBrowserContext", Some(create_params), None)
422            .await?;
423
424        let context = BrowserContext::with_options(
425            self.connection.clone(),
426            result.browser_context_id,
427            options,
428        );
429
430        // Apply options
431        context.apply_options().await?;
432
433        // Restore storage state if any
434        if let Some(state) = storage_state {
435            // Restore cookies
436            context.add_cookies(state.cookies.clone()).await?;
437
438            // Restore localStorage via init script
439            let local_storage_script = state.to_local_storage_init_script();
440            if !local_storage_script.is_empty() {
441                context.add_init_script(&local_storage_script).await?;
442            }
443
444            // Restore IndexedDB via init script
445            let indexed_db_script = state.to_indexed_db_init_script();
446            if !indexed_db_script.is_empty() {
447                context.add_init_script(&indexed_db_script).await?;
448            }
449        }
450
451        Ok(context)
452    }
453
454    /// Close the browser.
455    ///
456    /// If this browser was launched by us, the process will be terminated
457    /// and properly reaped to prevent zombie processes.
458    /// If it was connected to, only the WebSocket connection is closed.
459    ///
460    /// # Errors
461    ///
462    /// Returns an error if closing fails.
463    pub async fn close(&self) -> Result<(), BrowserError> {
464        // If we own the process, terminate it and reap it
465        if let Some(ref process_mutex) = self.process {
466            let mut child = process_mutex.lock().await;
467            process::kill_and_reap_async(&mut child).await;
468        }
469
470        Ok(())
471    }
472
473    /// Get a reference to the CDP connection.
474    pub fn connection(&self) -> &Arc<CdpConnection> {
475        &self.connection
476    }
477
478    /// Check if this browser was launched by us.
479    pub fn is_owned(&self) -> bool {
480        self.owned
481    }
482}
483
484impl Drop for Browser {
485    fn drop(&mut self) {
486        // Try to kill and reap the process if we own it
487        if self.owned {
488            if let Some(ref process_mutex) = self.process {
489                // We can't await in drop, so we try to kill synchronously
490                if let Ok(mut guard) = process_mutex.try_lock() {
491                    // Use the sync helper with 10 attempts and 10ms delay between attempts (100ms total)
492                    process::kill_and_reap_sync(&mut guard, 10, Duration::from_millis(10));
493                }
494            }
495        }
496    }
497}