viewpoint_core/browser/launcher/
mod.rs

1//! Browser launching functionality.
2
3mod chromium_args;
4mod fs_utils;
5mod user_data;
6
7use std::env;
8use std::io::{BufRead, BufReader};
9use std::path::PathBuf;
10use std::process::{Child, Command, Stdio};
11use std::time::Duration;
12
13use tempfile::TempDir;
14use tokio::time::timeout;
15use tracing::{debug, info, instrument, trace, warn};
16use viewpoint_cdp::CdpConnection;
17
18use super::Browser;
19use crate::error::BrowserError;
20
21pub use user_data::UserDataDir;
22
23use chromium_args::{CHROMIUM_PATHS, STABILITY_ARGS};
24use fs_utils::copy_dir_recursive;
25
26/// Default timeout for browser launch.
27const DEFAULT_LAUNCH_TIMEOUT: Duration = Duration::from_secs(30);
28
29/// Builder for launching a browser.
30#[derive(Debug, Clone)]
31pub struct BrowserBuilder {
32    /// Path to Chromium executable.
33    executable_path: Option<PathBuf>,
34    /// Whether to run in headless mode.
35    headless: bool,
36    /// Additional command line arguments.
37    args: Vec<String>,
38    /// Launch timeout.
39    timeout: Duration,
40    /// User data directory configuration.
41    user_data_dir: UserDataDir,
42}
43
44impl Default for BrowserBuilder {
45    fn default() -> Self {
46        Self::new()
47    }
48}
49
50impl BrowserBuilder {
51    /// Create a new browser builder with default settings.
52    ///
53    /// By default, the browser uses an isolated temporary directory for user data.
54    /// This prevents conflicts when running multiple browser instances and ensures
55    /// clean sessions for automation.
56    pub fn new() -> Self {
57        Self {
58            executable_path: None,
59            headless: true,
60            args: Vec::new(),
61            timeout: DEFAULT_LAUNCH_TIMEOUT,
62            user_data_dir: UserDataDir::Temp,
63        }
64    }
65
66    /// Set the path to the Chromium executable.
67    ///
68    /// If not set, the launcher will search common paths and
69    /// check the `CHROMIUM_PATH` environment variable.
70    #[must_use]
71    pub fn executable_path(mut self, path: impl Into<PathBuf>) -> Self {
72        self.executable_path = Some(path.into());
73        self
74    }
75
76    /// Set whether to run in headless mode.
77    ///
78    /// Default is `true`.
79    #[must_use]
80    pub fn headless(mut self, headless: bool) -> Self {
81        self.headless = headless;
82        self
83    }
84
85    /// Add additional command line arguments.
86    #[must_use]
87    pub fn args<I, S>(mut self, args: I) -> Self
88    where
89        I: IntoIterator<Item = S>,
90        S: Into<String>,
91    {
92        self.args.extend(args.into_iter().map(Into::into));
93        self
94    }
95
96    /// Set the launch timeout.
97    ///
98    /// Default is 30 seconds.
99    #[must_use]
100    pub fn timeout(mut self, timeout: Duration) -> Self {
101        self.timeout = timeout;
102        self
103    }
104
105    /// Set a persistent user data directory for browser profile.
106    ///
107    /// When set, browser state (cookies, localStorage, settings) persists
108    /// in the specified directory across browser restarts. The directory
109    /// is NOT cleaned up when the browser closes.
110    ///
111    /// **Note**: Using the same directory for multiple concurrent browser
112    /// instances will cause profile lock conflicts.
113    ///
114    /// # Example
115    ///
116    /// ```no_run
117    /// use viewpoint_core::Browser;
118    ///
119    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
120    /// let browser = Browser::launch()
121    ///     .user_data_dir("/path/to/profile")
122    ///     .launch()
123    ///     .await?;
124    /// # Ok(())
125    /// # }
126    /// ```
127    #[must_use]
128    pub fn user_data_dir(mut self, path: impl Into<PathBuf>) -> Self {
129        self.user_data_dir = UserDataDir::Persist(path.into());
130        self
131    }
132
133    /// Use the system default profile directory.
134    ///
135    /// On Linux, this is typically `~/.config/chromium/`.
136    /// No `--user-data-dir` flag is passed to Chromium.
137    ///
138    /// **Warning**: This can cause conflicts if another Chromium instance is running,
139    /// or if a previous session crashed without proper cleanup. Prefer the default
140    /// isolated temp profile for automation scenarios.
141    ///
142    /// # Example
143    ///
144    /// ```no_run
145    /// use viewpoint_core::Browser;
146    ///
147    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
148    /// let browser = Browser::launch()
149    ///     .user_data_dir_system()
150    ///     .launch()
151    ///     .await?;
152    /// # Ok(())
153    /// # }
154    /// ```
155    #[must_use]
156    pub fn user_data_dir_system(mut self) -> Self {
157        self.user_data_dir = UserDataDir::System;
158        self
159    }
160
161    /// Use a template profile copied to a temporary directory.
162    ///
163    /// The contents of the template directory are copied to a new temporary
164    /// directory. This allows starting with pre-configured settings, extensions,
165    /// or cookies while maintaining isolation between sessions.
166    ///
167    /// The temporary directory is automatically cleaned up when the browser
168    /// closes or is dropped. The original template directory is unchanged.
169    ///
170    /// # Example
171    ///
172    /// ```no_run
173    /// use viewpoint_core::Browser;
174    ///
175    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
176    /// // Create a browser with extensions from a template profile
177    /// let browser = Browser::launch()
178    ///     .user_data_dir_template_from("/path/to/template-profile")
179    ///     .launch()
180    ///     .await?;
181    /// # Ok(())
182    /// # }
183    /// ```
184    ///
185    /// # Loading Extensions
186    ///
187    /// Extensions can also be loaded at runtime without a template profile:
188    ///
189    /// ```no_run
190    /// use viewpoint_core::Browser;
191    ///
192    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
193    /// let browser = Browser::launch()
194    ///     .args(["--load-extension=/path/to/unpacked-extension"])
195    ///     .launch()
196    ///     .await?;
197    /// # Ok(())
198    /// # }
199    /// ```
200    #[must_use]
201    pub fn user_data_dir_template_from(mut self, template_path: impl Into<PathBuf>) -> Self {
202        self.user_data_dir = UserDataDir::TempFromTemplate(template_path.into());
203        self
204    }
205
206    /// Launch the browser.
207    ///
208    /// # Errors
209    ///
210    /// Returns an error if:
211    /// - Chromium is not found
212    /// - The process fails to spawn
213    /// - The browser doesn't start within the timeout
214    /// - Template directory doesn't exist or can't be copied
215    #[instrument(level = "info", skip(self), fields(headless = self.headless, timeout_ms = self.timeout.as_millis()))]
216    pub async fn launch(self) -> Result<Browser, BrowserError> {
217        info!("Launching browser");
218
219        let executable = self.find_executable()?;
220        info!(executable = %executable.display(), "Found Chromium executable");
221
222        // Handle user data directory configuration
223        let (user_data_path, temp_dir) = self.prepare_user_data_dir()?;
224
225        let mut cmd = Command::new(&executable);
226
227        // Add default arguments
228        cmd.arg("--remote-debugging-port=0");
229
230        if self.headless {
231            cmd.arg("--headless=new");
232            debug!("Running in headless mode");
233        } else {
234            debug!("Running in headed mode");
235        }
236
237        // Add common stability flags
238        cmd.args(STABILITY_ARGS);
239        trace!(arg_count = STABILITY_ARGS.len(), "Added stability flags");
240
241        // Add user data directory if we have one
242        if let Some(ref user_data_dir) = user_data_path {
243            cmd.arg(format!("--user-data-dir={}", user_data_dir.display()));
244            debug!(user_data_dir = %user_data_dir.display(), "Using user data directory");
245        } else {
246            debug!("Using system default user data directory");
247        }
248
249        // Add user arguments
250        if !self.args.is_empty() {
251            cmd.args(&self.args);
252            debug!(user_args = ?self.args, "Added user arguments");
253        }
254
255        // Capture stderr for the WebSocket URL
256        cmd.stderr(Stdio::piped());
257        cmd.stdout(Stdio::null());
258
259        info!("Spawning Chromium process");
260        let mut child = cmd.spawn().map_err(|e| {
261            warn!(error = %e, "Failed to spawn Chromium process");
262            BrowserError::LaunchFailed(e.to_string())
263        })?;
264
265        let pid = child.id();
266        info!(pid = pid, "Chromium process spawned");
267
268        // Read the WebSocket URL from stderr
269        debug!("Waiting for DevTools WebSocket URL");
270        let ws_url = timeout(self.timeout, Self::read_ws_url(&mut child))
271            .await
272            .map_err(|_| {
273                warn!(
274                    timeout_ms = self.timeout.as_millis(),
275                    "Browser launch timed out"
276                );
277                BrowserError::LaunchTimeout(self.timeout)
278            })??;
279
280        info!(ws_url = %ws_url, "Got DevTools WebSocket URL");
281
282        // Connect to the browser
283        debug!("Connecting to browser via CDP");
284        let connection = CdpConnection::connect(&ws_url).await?;
285
286        // Enable target discovery to receive Target.targetCreated events
287        // This is required for automatic page tracking (popups, target="_blank" links)
288        debug!("Enabling target discovery");
289        connection
290            .send_command::<_, serde_json::Value>(
291                "Target.setDiscoverTargets",
292                Some(viewpoint_cdp::protocol::target_domain::SetDiscoverTargetsParams {
293                    discover: true,
294                }),
295                None,
296            )
297            .await
298            .map_err(|e| BrowserError::LaunchFailed(format!("Failed to enable target discovery: {e}")))?;
299
300        info!(pid = pid, "Browser launched and connected successfully");
301        Ok(Browser::from_launch(connection, child, temp_dir))
302    }
303
304    /// Prepare the user data directory based on configuration.
305    ///
306    /// Returns the path to use for `--user-data-dir` (if any) and an optional
307    /// `TempDir` handle that should be stored in the `Browser` struct to ensure
308    /// cleanup on drop.
309    fn prepare_user_data_dir(&self) -> Result<(Option<PathBuf>, Option<TempDir>), BrowserError> {
310        match &self.user_data_dir {
311            UserDataDir::Temp => {
312                // Create a unique temporary directory
313                let temp_dir = TempDir::with_prefix("viewpoint-browser-").map_err(|e| {
314                    BrowserError::LaunchFailed(format!(
315                        "Failed to create temporary user data directory: {e}"
316                    ))
317                })?;
318                let path = temp_dir.path().to_path_buf();
319                debug!(path = %path.display(), "Created temporary user data directory");
320                Ok((Some(path), Some(temp_dir)))
321            }
322            UserDataDir::TempFromTemplate(template_path) => {
323                // Validate template exists
324                if !template_path.exists() {
325                    return Err(BrowserError::LaunchFailed(format!(
326                        "Template profile directory does not exist: {}",
327                        template_path.display()
328                    )));
329                }
330                if !template_path.is_dir() {
331                    return Err(BrowserError::LaunchFailed(format!(
332                        "Template profile path is not a directory: {}",
333                        template_path.display()
334                    )));
335                }
336
337                // Create temporary directory
338                let temp_dir = TempDir::with_prefix("viewpoint-browser-").map_err(|e| {
339                    BrowserError::LaunchFailed(format!(
340                        "Failed to create temporary user data directory: {e}"
341                    ))
342                })?;
343                let dest_path = temp_dir.path().to_path_buf();
344
345                // Copy template contents to temp directory
346                debug!(
347                    template = %template_path.display(),
348                    dest = %dest_path.display(),
349                    "Copying template profile to temporary directory"
350                );
351                copy_dir_recursive(template_path, &dest_path).map_err(|e| {
352                    BrowserError::LaunchFailed(format!("Failed to copy template profile: {e}"))
353                })?;
354
355                info!(
356                    template = %template_path.display(),
357                    dest = %dest_path.display(),
358                    "Template profile copied to temporary directory"
359                );
360                Ok((Some(dest_path), Some(temp_dir)))
361            }
362            UserDataDir::Persist(path) => {
363                // Use the specified path, no cleanup
364                debug!(path = %path.display(), "Using persistent user data directory");
365                Ok((Some(path.clone()), None))
366            }
367            UserDataDir::System => {
368                // No --user-data-dir flag, use system default
369                debug!("Using system default user data directory");
370                Ok((None, None))
371            }
372        }
373    }
374
375    /// Find the Chromium executable.
376    #[instrument(level = "debug", skip(self))]
377    fn find_executable(&self) -> Result<PathBuf, BrowserError> {
378        // Check if explicitly set
379        if let Some(ref path) = self.executable_path {
380            debug!(path = %path.display(), "Checking explicit executable path");
381            if path.exists() {
382                info!(path = %path.display(), "Using explicit executable path");
383                return Ok(path.clone());
384            }
385            warn!(path = %path.display(), "Explicit executable path does not exist");
386            return Err(BrowserError::ChromiumNotFound);
387        }
388
389        // Check environment variable
390        if let Ok(path_str) = env::var("CHROMIUM_PATH") {
391            let path = PathBuf::from(&path_str);
392            debug!(path = %path.display(), "Checking CHROMIUM_PATH environment variable");
393            if path.exists() {
394                info!(path = %path.display(), "Using CHROMIUM_PATH");
395                return Ok(path);
396            }
397            warn!(path = %path.display(), "CHROMIUM_PATH does not exist");
398        }
399
400        // Search common paths
401        debug!("Searching common Chromium paths");
402        for path_str in CHROMIUM_PATHS {
403            let path = PathBuf::from(path_str);
404            if path.exists() {
405                info!(path = %path.display(), "Found Chromium at common path");
406                return Ok(path);
407            }
408
409            // Also try which/where
410            if let Ok(output) = Command::new("which").arg(path_str).output() {
411                if output.status.success() {
412                    let found = String::from_utf8_lossy(&output.stdout).trim().to_string();
413                    if !found.is_empty() {
414                        let found_path = PathBuf::from(&found);
415                        info!(path = %found_path.display(), "Found Chromium via 'which'");
416                        return Ok(found_path);
417                    }
418                }
419            }
420        }
421
422        warn!("Chromium not found in any expected location");
423        Err(BrowserError::ChromiumNotFound)
424    }
425
426    /// Read the WebSocket URL from the browser's stderr.
427    async fn read_ws_url(child: &mut Child) -> Result<String, BrowserError> {
428        let stderr = child
429            .stderr
430            .take()
431            .ok_or_else(|| BrowserError::LaunchFailed("failed to capture stderr".into()))?;
432
433        // Spawn blocking read in a separate task
434        let handle = tokio::task::spawn_blocking(move || {
435            let reader = BufReader::new(stderr);
436
437            for line in reader.lines() {
438                let Ok(line) = line else { continue };
439
440                trace!(line = %line, "Read line from Chromium stderr");
441
442                // Look for "DevTools listening on ws://..."
443                if let Some(pos) = line.find("DevTools listening on ") {
444                    let url = &line[pos + 22..];
445                    return Some(url.trim().to_string());
446                }
447            }
448
449            None
450        });
451
452        handle
453            .await
454            .map_err(|e| BrowserError::LaunchFailed(e.to_string()))?
455            .ok_or(BrowserError::LaunchFailed(
456                "failed to find WebSocket URL in browser output".into(),
457            ))
458    }
459}