viewpoint_core/browser/
mod.rs

1//! Browser launching and management.
2//!
3//! This module provides the [`Browser`] type for connecting to and controlling
4//! Chromium-based browsers via the Chrome DevTools Protocol (CDP).
5//!
6//! # Connection Methods
7//!
8//! There are three ways to get a `Browser` instance:
9//!
10//! 1. **Launch a new browser** - [`Browser::launch()`] spawns a new Chromium process
11//! 2. **Connect via WebSocket URL** - [`Browser::connect()`] for direct WebSocket connection  
12//! 3. **Connect via HTTP endpoint** - [`Browser::connect_over_cdp()`] discovers WebSocket URL
13//!    from an HTTP endpoint like `http://localhost:9222`
14//!
15//! # Example: Launching a Browser
16//!
17//! ```no_run
18//! use viewpoint_core::Browser;
19//!
20//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
21//! let browser = Browser::launch()
22//!     .headless(true)
23//!     .launch()
24//!     .await?;
25//!
26//! let context = browser.new_context().await?;
27//! let page = context.new_page().await?;
28//! page.goto("https://example.com").goto().await?;
29//! # Ok(())
30//! # }
31//! ```
32//!
33//! # Example: Connecting to Existing Browser (MCP-style)
34//!
35//! This is useful for MCP servers or tools that need to connect to an already-running
36//! browser instance:
37//!
38//! ```no_run
39//! use viewpoint_core::Browser;
40//! use std::time::Duration;
41//!
42//! # async fn example() -> Result<(), viewpoint_core::CoreError> {
43//! // Connect via HTTP endpoint (discovers WebSocket URL automatically)
44//! let browser = Browser::connect_over_cdp("http://localhost:9222")
45//!     .timeout(Duration::from_secs(10))
46//!     .connect()
47//!     .await?;
48//!
49//! // Access existing browser contexts (including the default one)
50//! let contexts = browser.contexts().await?;
51//! for context in &contexts {
52//!     if context.is_default() {
53//!         // The default context has the browser's existing tabs
54//!         let pages = context.pages().await?;
55//!         println!("Found {} existing pages", pages.len());
56//!     }
57//! }
58//!
59//! // You can also create new contexts in the connected browser
60//! let new_context = browser.new_context().await?;
61//! # Ok(())
62//! # }
63//! ```
64//!
65//! # Ownership Model
66//!
67//! Browsers and contexts track ownership:
68//!
69//! - **Launched browsers** (`Browser::launch()`) are "owned" - closing them terminates the process
70//! - **Connected browsers** (`connect()`, `connect_over_cdp()`) are not owned - closing only
71//!   disconnects, leaving the browser process running
72//! - **Created contexts** (`new_context()`) are owned - closing disposes them
73//! - **Discovered contexts** (`contexts()`) are not owned - closing only disconnects
74
75mod connector;
76mod context_builder;
77mod launcher;
78mod process;
79
80use std::process::Child;
81use std::sync::Arc;
82use std::time::Duration;
83
84use tempfile::TempDir;
85use tokio::sync::Mutex;
86use tracing::info;
87use viewpoint_cdp::CdpConnection;
88use viewpoint_cdp::protocol::target_domain::{
89    CreateBrowserContextParams, CreateBrowserContextResult, GetBrowserContextsResult,
90};
91
92use crate::context::{BrowserContext, ContextOptions, StorageState, StorageStateSource};
93use crate::error::BrowserError;
94
95pub use connector::ConnectOverCdpBuilder;
96pub use context_builder::NewContextBuilder;
97pub use launcher::{BrowserBuilder, UserDataDir};
98
99/// Default timeout for browser operations.
100const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
101
102/// A browser instance connected via CDP.
103///
104/// The `Browser` struct represents a connection to a Chromium-based browser.
105/// It can be obtained by:
106///
107/// - [`Browser::launch()`] - Spawn and connect to a new browser process
108/// - [`Browser::connect()`] - Connect to an existing browser via WebSocket URL
109/// - [`Browser::connect_over_cdp()`] - Connect via HTTP endpoint (auto-discovers WebSocket)
110///
111/// # Key Methods
112///
113/// - [`new_context()`](Self::new_context) - Create a new isolated browser context
114/// - [`contexts()`](Self::contexts) - List all browser contexts (including pre-existing ones)
115/// - [`close()`](Self::close) - Close the browser connection
116///
117/// # Ownership
118///
119/// Use [`is_owned()`](Self::is_owned) to check if this browser was launched by us
120/// (vs connected to an existing process). Owned browsers are terminated when closed.
121///
122/// # User Data Directory
123///
124/// By default, browsers use an isolated temporary directory for user data
125/// (cookies, localStorage, settings). This prevents conflicts when running
126/// multiple browser instances and ensures clean sessions. The temporary
127/// directory is automatically cleaned up when the browser closes or is dropped.
128///
129/// See [`UserDataDir`] for configuration options.
130#[derive(Debug)]
131pub struct Browser {
132    /// CDP connection to the browser.
133    connection: Arc<CdpConnection>,
134    /// Browser process (only present if we launched it).
135    process: Option<Mutex<Child>>,
136    /// Whether the browser was launched by us (vs connected to).
137    owned: bool,
138    /// Temporary user data directory (if using Temp or TempFromTemplate mode).
139    /// Stored here to ensure cleanup on drop.
140    _temp_user_data_dir: Option<TempDir>,
141}
142
143impl Browser {
144    /// Create a browser builder for launching a new browser.
145    ///
146    /// # Example
147    ///
148    /// ```no_run
149    /// use viewpoint_core::Browser;
150    ///
151    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
152    /// let browser = Browser::launch()
153    ///     .headless(true)
154    ///     .launch()
155    ///     .await?;
156    /// # Ok(())
157    /// # }
158    /// ```
159    pub fn launch() -> BrowserBuilder {
160        BrowserBuilder::new()
161    }
162
163    /// Connect to an already-running browser via WebSocket URL.
164    ///
165    /// # Example
166    ///
167    /// ```no_run
168    /// use viewpoint_core::Browser;
169    ///
170    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
171    /// let browser = Browser::connect("ws://localhost:9222/devtools/browser/...").await?;
172    /// # Ok(())
173    /// # }
174    /// ```
175    ///
176    /// # Errors
177    ///
178    /// Returns an error if the connection fails.
179    pub async fn connect(ws_url: &str) -> Result<Self, BrowserError> {
180        let connection = CdpConnection::connect(ws_url).await?;
181
182        // Enable target discovery to receive Target.targetCreated events
183        // This is required for automatic page tracking (popups, target="_blank" links)
184        connection
185            .send_command::<_, serde_json::Value>(
186                "Target.setDiscoverTargets",
187                Some(viewpoint_cdp::protocol::target_domain::SetDiscoverTargetsParams {
188                    discover: true,
189                }),
190                None,
191            )
192            .await
193            .map_err(|e| BrowserError::ConnectionFailed(format!("Failed to enable target discovery: {e}")))?;
194
195        Ok(Self {
196            connection: Arc::new(connection),
197            process: None,
198            owned: false,
199            _temp_user_data_dir: None,
200        })
201    }
202
203    /// Connect to an already-running browser via HTTP endpoint or WebSocket URL.
204    ///
205    /// This method supports both:
206    /// - HTTP endpoint URLs (e.g., `http://localhost:9222`) - auto-discovers WebSocket URL
207    /// - WebSocket URLs (e.g., `ws://localhost:9222/devtools/browser/...`) - direct connection
208    ///
209    /// For HTTP endpoints, the method fetches `/json/version` to discover the WebSocket URL,
210    /// similar to Playwright's `connectOverCDP`.
211    ///
212    /// # Example
213    ///
214    /// ```no_run
215    /// use viewpoint_core::Browser;
216    /// use std::time::Duration;
217    ///
218    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
219    /// // Connect via HTTP endpoint (recommended)
220    /// let browser = Browser::connect_over_cdp("http://localhost:9222")
221    ///     .connect()
222    ///     .await?;
223    ///
224    /// // With custom timeout and headers
225    /// let browser = Browser::connect_over_cdp("http://localhost:9222")
226    ///     .timeout(Duration::from_secs(10))
227    ///     .header("Authorization", "Bearer token")
228    ///     .connect()
229    ///     .await?;
230    ///
231    /// // Access existing browser contexts and pages
232    /// let contexts = browser.contexts().await?;
233    /// for context in contexts {
234    ///     let pages = context.pages().await?;
235    ///     for page in &pages {
236    ///         println!("Found page: {}", page.target_id());
237    ///     }
238    /// }
239    /// # Ok(())
240    /// # }
241    /// ```
242    pub fn connect_over_cdp(endpoint_url: impl Into<String>) -> ConnectOverCdpBuilder {
243        ConnectOverCdpBuilder::new(endpoint_url)
244    }
245
246    /// Get all browser contexts.
247    ///
248    /// Returns all existing browser contexts, including:
249    /// - Contexts created via `new_context()`
250    /// - The default context (for connected browsers)
251    /// - Any pre-existing contexts (when connecting to an already-running browser)
252    ///
253    /// # Example
254    ///
255    /// ```no_run
256    /// use viewpoint_core::Browser;
257    ///
258    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
259    /// let browser = Browser::connect_over_cdp("http://localhost:9222")
260    ///     .connect()
261    ///     .await?;
262    ///
263    /// let contexts = browser.contexts().await?;
264    /// println!("Found {} browser contexts", contexts.len());
265    ///
266    /// // The default context (empty string ID) represents the browser's main profile
267    /// for context in &contexts {
268    ///     if context.id().is_empty() {
269    ///         println!("This is the default context");
270    ///     }
271    /// }
272    /// # Ok(())
273    /// # }
274    /// ```
275    ///
276    /// # Errors
277    ///
278    /// Returns an error if querying contexts fails.
279    pub async fn contexts(&self) -> Result<Vec<BrowserContext>, BrowserError> {
280        info!("Getting browser contexts");
281
282        let result: GetBrowserContextsResult = self
283            .connection
284            .send_command("Target.getBrowserContexts", None::<()>, None)
285            .await?;
286
287        let mut contexts = Vec::new();
288
289        // Always include the default context (empty string ID)
290        // The default context represents the browser's main profile
291        contexts.push(BrowserContext::from_existing(
292            self.connection.clone(),
293            String::new(), // Empty string = default context
294        ));
295
296        // Add other contexts
297        for context_id in result.browser_context_ids {
298            if !context_id.is_empty() {
299                contexts.push(BrowserContext::from_existing(
300                    self.connection.clone(),
301                    context_id,
302                ));
303            }
304        }
305
306        info!(count = contexts.len(), "Found browser contexts");
307
308        Ok(contexts)
309    }
310
311    /// Create a browser from an existing connection and process (legacy, no temp dir).
312    pub(crate) fn from_connection_and_process(connection: CdpConnection, process: Child) -> Self {
313        Self {
314            connection: Arc::new(connection),
315            process: Some(Mutex::new(process)),
316            owned: true,
317            _temp_user_data_dir: None,
318        }
319    }
320
321    /// Create a browser from a launch operation with optional temp directory.
322    pub(crate) fn from_launch(
323        connection: CdpConnection,
324        process: Child,
325        temp_user_data_dir: Option<TempDir>,
326    ) -> Self {
327        Self {
328            connection: Arc::new(connection),
329            process: Some(Mutex::new(process)),
330            owned: true,
331            _temp_user_data_dir: temp_user_data_dir,
332        }
333    }
334
335    /// Create a new isolated browser context.
336    ///
337    /// Browser contexts are isolated environments within the browser,
338    /// similar to incognito windows. They have their own cookies,
339    /// cache, and storage.
340    ///
341    /// # Errors
342    ///
343    /// Returns an error if context creation fails.
344    pub async fn new_context(&self) -> Result<BrowserContext, BrowserError> {
345        let result: CreateBrowserContextResult = self
346            .connection
347            .send_command(
348                "Target.createBrowserContext",
349                Some(CreateBrowserContextParams::default()),
350                None,
351            )
352            .await?;
353
354        Ok(BrowserContext::new(
355            self.connection.clone(),
356            result.browser_context_id,
357        ))
358    }
359
360    /// Create a new context options builder.
361    ///
362    /// Use this to create a browser context with custom configuration.
363    ///
364    /// # Example
365    ///
366    /// ```no_run
367    /// use viewpoint_core::{Browser, Permission};
368    ///
369    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
370    /// let browser = Browser::launch().headless(true).launch().await?;
371    ///
372    /// let context = browser.new_context_builder()
373    ///     .geolocation(37.7749, -122.4194)
374    ///     .permissions(vec![Permission::Geolocation])
375    ///     .offline(false)
376    ///     .build()
377    ///     .await?;
378    /// # Ok(())
379    /// # }
380    /// ```
381    pub fn new_context_builder(&self) -> NewContextBuilder<'_> {
382        NewContextBuilder::new(self)
383    }
384
385    /// Create a new isolated browser context with options.
386    ///
387    /// # Errors
388    ///
389    /// Returns an error if context creation fails.
390    pub async fn new_context_with_options(
391        &self,
392        options: ContextOptions,
393    ) -> Result<BrowserContext, BrowserError> {
394        // Load storage state if specified
395        let storage_state = match &options.storage_state {
396            Some(StorageStateSource::Path(path)) => {
397                Some(StorageState::load(path).await.map_err(|e| {
398                    BrowserError::LaunchFailed(format!("Failed to load storage state: {e}"))
399                })?)
400            }
401            Some(StorageStateSource::State(state)) => Some(state.clone()),
402            None => None,
403        };
404
405        // Build CDP params with proxy configuration if specified
406        let create_params = match &options.proxy {
407            Some(proxy) => CreateBrowserContextParams {
408                dispose_on_detach: None,
409                proxy_server: Some(proxy.server.clone()),
410                proxy_bypass_list: proxy.bypass.clone(),
411            },
412            None => CreateBrowserContextParams::default(),
413        };
414
415        let result: CreateBrowserContextResult = self
416            .connection
417            .send_command("Target.createBrowserContext", Some(create_params), None)
418            .await?;
419
420        let context = BrowserContext::with_options(
421            self.connection.clone(),
422            result.browser_context_id,
423            options,
424        );
425
426        // Apply options
427        context.apply_options().await?;
428
429        // Restore storage state if any
430        if let Some(state) = storage_state {
431            // Restore cookies
432            context.add_cookies(state.cookies.clone()).await?;
433
434            // Restore localStorage via init script
435            let local_storage_script = state.to_local_storage_init_script();
436            if !local_storage_script.is_empty() {
437                context.add_init_script(&local_storage_script).await?;
438            }
439
440            // Restore IndexedDB via init script
441            let indexed_db_script = state.to_indexed_db_init_script();
442            if !indexed_db_script.is_empty() {
443                context.add_init_script(&indexed_db_script).await?;
444            }
445        }
446
447        Ok(context)
448    }
449
450    /// Close the browser.
451    ///
452    /// If this browser was launched by us, the process will be terminated
453    /// and properly reaped to prevent zombie processes.
454    /// If it was connected to, only the WebSocket connection is closed.
455    ///
456    /// # Errors
457    ///
458    /// Returns an error if closing fails.
459    pub async fn close(&self) -> Result<(), BrowserError> {
460        // If we own the process, terminate it and reap it
461        if let Some(ref process_mutex) = self.process {
462            let mut child = process_mutex.lock().await;
463            process::kill_and_reap_async(&mut child).await;
464        }
465
466        Ok(())
467    }
468
469    /// Get a reference to the CDP connection.
470    pub fn connection(&self) -> &Arc<CdpConnection> {
471        &self.connection
472    }
473
474    /// Check if this browser was launched by us.
475    pub fn is_owned(&self) -> bool {
476        self.owned
477    }
478}
479
480impl Drop for Browser {
481    fn drop(&mut self) {
482        // Try to kill and reap the process if we own it
483        if self.owned {
484            if let Some(ref process_mutex) = self.process {
485                // We can't await in drop, so we try to kill synchronously
486                if let Ok(mut guard) = process_mutex.try_lock() {
487                    // Use the sync helper with 10 attempts and 10ms delay between attempts (100ms total)
488                    process::kill_and_reap_sync(&mut guard, 10, Duration::from_millis(10));
489                }
490            }
491        }
492    }
493}