viewpoint_core/browser/launcher/
mod.rs

1//! Browser launching functionality.
2
3mod chromium_args;
4mod fs_utils;
5mod user_data;
6
7use std::env;
8use std::io::{BufRead, BufReader};
9use std::path::PathBuf;
10use std::process::{Child, Command, Stdio};
11use std::time::Duration;
12
13use tempfile::TempDir;
14use tokio::time::timeout;
15use tracing::{debug, info, instrument, trace, warn};
16use viewpoint_cdp::CdpConnection;
17
18use super::Browser;
19use crate::error::BrowserError;
20
21pub use user_data::UserDataDir;
22
23use chromium_args::{CHROMIUM_PATHS, STABILITY_ARGS};
24use fs_utils::copy_dir_recursive;
25
26/// Default timeout for browser launch.
27const DEFAULT_LAUNCH_TIMEOUT: Duration = Duration::from_secs(30);
28
29/// Builder for launching a browser.
30#[derive(Debug, Clone)]
31pub struct BrowserBuilder {
32    /// Path to Chromium executable.
33    executable_path: Option<PathBuf>,
34    /// Whether to run in headless mode.
35    headless: bool,
36    /// Additional command line arguments.
37    args: Vec<String>,
38    /// Launch timeout.
39    timeout: Duration,
40    /// User data directory configuration.
41    user_data_dir: UserDataDir,
42}
43
44impl Default for BrowserBuilder {
45    fn default() -> Self {
46        Self::new()
47    }
48}
49
50impl BrowserBuilder {
51    /// Create a new browser builder with default settings.
52    ///
53    /// By default, the browser uses an isolated temporary directory for user data.
54    /// This prevents conflicts when running multiple browser instances and ensures
55    /// clean sessions for automation.
56    pub fn new() -> Self {
57        Self {
58            executable_path: None,
59            headless: true,
60            args: Vec::new(),
61            timeout: DEFAULT_LAUNCH_TIMEOUT,
62            user_data_dir: UserDataDir::Temp,
63        }
64    }
65
66    /// Set the path to the Chromium executable.
67    ///
68    /// If not set, the launcher will search common paths and
69    /// check the `CHROMIUM_PATH` environment variable.
70    #[must_use]
71    pub fn executable_path(mut self, path: impl Into<PathBuf>) -> Self {
72        self.executable_path = Some(path.into());
73        self
74    }
75
76    /// Set whether to run in headless mode.
77    ///
78    /// Default is `true`.
79    #[must_use]
80    pub fn headless(mut self, headless: bool) -> Self {
81        self.headless = headless;
82        self
83    }
84
85    /// Add additional command line arguments.
86    #[must_use]
87    pub fn args<I, S>(mut self, args: I) -> Self
88    where
89        I: IntoIterator<Item = S>,
90        S: Into<String>,
91    {
92        self.args.extend(args.into_iter().map(Into::into));
93        self
94    }
95
96    /// Set the launch timeout.
97    ///
98    /// Default is 30 seconds.
99    #[must_use]
100    pub fn timeout(mut self, timeout: Duration) -> Self {
101        self.timeout = timeout;
102        self
103    }
104
105    /// Set a persistent user data directory for browser profile.
106    ///
107    /// When set, browser state (cookies, localStorage, settings) persists
108    /// in the specified directory across browser restarts. The directory
109    /// is NOT cleaned up when the browser closes.
110    ///
111    /// **Note**: Using the same directory for multiple concurrent browser
112    /// instances will cause profile lock conflicts.
113    ///
114    /// # Example
115    ///
116    /// ```no_run
117    /// use viewpoint_core::Browser;
118    ///
119    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
120    /// let browser = Browser::launch()
121    ///     .user_data_dir("/path/to/profile")
122    ///     .launch()
123    ///     .await?;
124    /// # Ok(())
125    /// # }
126    /// ```
127    #[must_use]
128    pub fn user_data_dir(mut self, path: impl Into<PathBuf>) -> Self {
129        self.user_data_dir = UserDataDir::Persist(path.into());
130        self
131    }
132
133    /// Use the system default profile directory.
134    ///
135    /// On Linux, this is typically `~/.config/chromium/`.
136    /// No `--user-data-dir` flag is passed to Chromium.
137    ///
138    /// **Warning**: This can cause conflicts if another Chromium instance is running,
139    /// or if a previous session crashed without proper cleanup. Prefer the default
140    /// isolated temp profile for automation scenarios.
141    ///
142    /// # Example
143    ///
144    /// ```no_run
145    /// use viewpoint_core::Browser;
146    ///
147    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
148    /// let browser = Browser::launch()
149    ///     .user_data_dir_system()
150    ///     .launch()
151    ///     .await?;
152    /// # Ok(())
153    /// # }
154    /// ```
155    #[must_use]
156    pub fn user_data_dir_system(mut self) -> Self {
157        self.user_data_dir = UserDataDir::System;
158        self
159    }
160
161    /// Use a template profile copied to a temporary directory.
162    ///
163    /// The contents of the template directory are copied to a new temporary
164    /// directory. This allows starting with pre-configured settings, extensions,
165    /// or cookies while maintaining isolation between sessions.
166    ///
167    /// The temporary directory is automatically cleaned up when the browser
168    /// closes or is dropped. The original template directory is unchanged.
169    ///
170    /// # Example
171    ///
172    /// ```no_run
173    /// use viewpoint_core::Browser;
174    ///
175    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
176    /// // Create a browser with extensions from a template profile
177    /// let browser = Browser::launch()
178    ///     .user_data_dir_template_from("/path/to/template-profile")
179    ///     .launch()
180    ///     .await?;
181    /// # Ok(())
182    /// # }
183    /// ```
184    ///
185    /// # Loading Extensions
186    ///
187    /// Extensions can also be loaded at runtime without a template profile:
188    ///
189    /// ```no_run
190    /// use viewpoint_core::Browser;
191    ///
192    /// # async fn example() -> Result<(), viewpoint_core::CoreError> {
193    /// let browser = Browser::launch()
194    ///     .args(["--load-extension=/path/to/unpacked-extension"])
195    ///     .launch()
196    ///     .await?;
197    /// # Ok(())
198    /// # }
199    /// ```
200    #[must_use]
201    pub fn user_data_dir_template_from(mut self, template_path: impl Into<PathBuf>) -> Self {
202        self.user_data_dir = UserDataDir::TempFromTemplate(template_path.into());
203        self
204    }
205
206    /// Launch the browser.
207    ///
208    /// # Errors
209    ///
210    /// Returns an error if:
211    /// - Chromium is not found
212    /// - The process fails to spawn
213    /// - The browser doesn't start within the timeout
214    /// - Template directory doesn't exist or can't be copied
215    #[instrument(level = "info", skip(self), fields(headless = self.headless, timeout_ms = self.timeout.as_millis()))]
216    pub async fn launch(self) -> Result<Browser, BrowserError> {
217        info!("Launching browser");
218
219        let executable = self.find_executable()?;
220        info!(executable = %executable.display(), "Found Chromium executable");
221
222        // Handle user data directory configuration
223        let (user_data_path, temp_dir) = self.prepare_user_data_dir()?;
224
225        let mut cmd = Command::new(&executable);
226
227        // Add default arguments
228        cmd.arg("--remote-debugging-port=0");
229
230        if self.headless {
231            cmd.arg("--headless=new");
232            debug!("Running in headless mode");
233        } else {
234            debug!("Running in headed mode");
235        }
236
237        // Add common stability flags
238        cmd.args(STABILITY_ARGS);
239        trace!(arg_count = STABILITY_ARGS.len(), "Added stability flags");
240
241        // Add user data directory if we have one
242        if let Some(ref user_data_dir) = user_data_path {
243            cmd.arg(format!("--user-data-dir={}", user_data_dir.display()));
244            debug!(user_data_dir = %user_data_dir.display(), "Using user data directory");
245        } else {
246            debug!("Using system default user data directory");
247        }
248
249        // Add user arguments
250        if !self.args.is_empty() {
251            cmd.args(&self.args);
252            debug!(user_args = ?self.args, "Added user arguments");
253        }
254
255        // Capture stderr for the WebSocket URL
256        cmd.stderr(Stdio::piped());
257        cmd.stdout(Stdio::null());
258
259        info!("Spawning Chromium process");
260        let mut child = cmd.spawn().map_err(|e| {
261            warn!(error = %e, "Failed to spawn Chromium process");
262            BrowserError::LaunchFailed(e.to_string())
263        })?;
264
265        let pid = child.id();
266        info!(pid = pid, "Chromium process spawned");
267
268        // Read the WebSocket URL from stderr
269        debug!("Waiting for DevTools WebSocket URL");
270        let ws_url = timeout(self.timeout, Self::read_ws_url(&mut child))
271            .await
272            .map_err(|_| {
273                warn!(
274                    timeout_ms = self.timeout.as_millis(),
275                    "Browser launch timed out"
276                );
277                BrowserError::LaunchTimeout(self.timeout)
278            })??;
279
280        info!(ws_url = %ws_url, "Got DevTools WebSocket URL");
281
282        // Connect to the browser
283        debug!("Connecting to browser via CDP");
284        let connection = CdpConnection::connect(&ws_url).await?;
285
286        // Enable target discovery to receive Target.targetCreated events
287        // This is required for automatic page tracking (popups, target="_blank" links)
288        debug!("Enabling target discovery");
289        connection
290            .send_command::<_, serde_json::Value>(
291                "Target.setDiscoverTargets",
292                Some(
293                    viewpoint_cdp::protocol::target_domain::SetDiscoverTargetsParams {
294                        discover: true,
295                    },
296                ),
297                None,
298            )
299            .await
300            .map_err(|e| {
301                BrowserError::LaunchFailed(format!("Failed to enable target discovery: {e}"))
302            })?;
303
304        info!(pid = pid, "Browser launched and connected successfully");
305        Ok(Browser::from_launch(connection, child, temp_dir))
306    }
307
308    /// Prepare the user data directory based on configuration.
309    ///
310    /// Returns the path to use for `--user-data-dir` (if any) and an optional
311    /// `TempDir` handle that should be stored in the `Browser` struct to ensure
312    /// cleanup on drop.
313    fn prepare_user_data_dir(&self) -> Result<(Option<PathBuf>, Option<TempDir>), BrowserError> {
314        match &self.user_data_dir {
315            UserDataDir::Temp => {
316                // Create a unique temporary directory
317                let temp_dir = TempDir::with_prefix("viewpoint-browser-").map_err(|e| {
318                    BrowserError::LaunchFailed(format!(
319                        "Failed to create temporary user data directory: {e}"
320                    ))
321                })?;
322                let path = temp_dir.path().to_path_buf();
323                debug!(path = %path.display(), "Created temporary user data directory");
324                Ok((Some(path), Some(temp_dir)))
325            }
326            UserDataDir::TempFromTemplate(template_path) => {
327                // Validate template exists
328                if !template_path.exists() {
329                    return Err(BrowserError::LaunchFailed(format!(
330                        "Template profile directory does not exist: {}",
331                        template_path.display()
332                    )));
333                }
334                if !template_path.is_dir() {
335                    return Err(BrowserError::LaunchFailed(format!(
336                        "Template profile path is not a directory: {}",
337                        template_path.display()
338                    )));
339                }
340
341                // Create temporary directory
342                let temp_dir = TempDir::with_prefix("viewpoint-browser-").map_err(|e| {
343                    BrowserError::LaunchFailed(format!(
344                        "Failed to create temporary user data directory: {e}"
345                    ))
346                })?;
347                let dest_path = temp_dir.path().to_path_buf();
348
349                // Copy template contents to temp directory
350                debug!(
351                    template = %template_path.display(),
352                    dest = %dest_path.display(),
353                    "Copying template profile to temporary directory"
354                );
355                copy_dir_recursive(template_path, &dest_path).map_err(|e| {
356                    BrowserError::LaunchFailed(format!("Failed to copy template profile: {e}"))
357                })?;
358
359                info!(
360                    template = %template_path.display(),
361                    dest = %dest_path.display(),
362                    "Template profile copied to temporary directory"
363                );
364                Ok((Some(dest_path), Some(temp_dir)))
365            }
366            UserDataDir::Persist(path) => {
367                // Use the specified path, no cleanup
368                debug!(path = %path.display(), "Using persistent user data directory");
369                Ok((Some(path.clone()), None))
370            }
371            UserDataDir::System => {
372                // No --user-data-dir flag, use system default
373                debug!("Using system default user data directory");
374                Ok((None, None))
375            }
376        }
377    }
378
379    /// Find the Chromium executable.
380    #[instrument(level = "debug", skip(self))]
381    fn find_executable(&self) -> Result<PathBuf, BrowserError> {
382        // Check if explicitly set
383        if let Some(ref path) = self.executable_path {
384            debug!(path = %path.display(), "Checking explicit executable path");
385            if path.exists() {
386                info!(path = %path.display(), "Using explicit executable path");
387                return Ok(path.clone());
388            }
389            warn!(path = %path.display(), "Explicit executable path does not exist");
390            return Err(BrowserError::ChromiumNotFound);
391        }
392
393        // Check environment variable
394        if let Ok(path_str) = env::var("CHROMIUM_PATH") {
395            let path = PathBuf::from(&path_str);
396            debug!(path = %path.display(), "Checking CHROMIUM_PATH environment variable");
397            if path.exists() {
398                info!(path = %path.display(), "Using CHROMIUM_PATH");
399                return Ok(path);
400            }
401            warn!(path = %path.display(), "CHROMIUM_PATH does not exist");
402        }
403
404        // Search common paths
405        debug!("Searching common Chromium paths");
406        for path_str in CHROMIUM_PATHS {
407            let path = PathBuf::from(path_str);
408            if path.exists() {
409                info!(path = %path.display(), "Found Chromium at common path");
410                return Ok(path);
411            }
412
413            // Also try which/where
414            if let Ok(output) = Command::new("which").arg(path_str).output() {
415                if output.status.success() {
416                    let found = String::from_utf8_lossy(&output.stdout).trim().to_string();
417                    if !found.is_empty() {
418                        let found_path = PathBuf::from(&found);
419                        info!(path = %found_path.display(), "Found Chromium via 'which'");
420                        return Ok(found_path);
421                    }
422                }
423            }
424        }
425
426        warn!("Chromium not found in any expected location");
427        Err(BrowserError::ChromiumNotFound)
428    }
429
430    /// Read the WebSocket URL from the browser's stderr.
431    async fn read_ws_url(child: &mut Child) -> Result<String, BrowserError> {
432        let stderr = child
433            .stderr
434            .take()
435            .ok_or_else(|| BrowserError::LaunchFailed("failed to capture stderr".into()))?;
436
437        // Spawn blocking read in a separate task
438        let handle = tokio::task::spawn_blocking(move || {
439            let reader = BufReader::new(stderr);
440
441            for line in reader.lines() {
442                let Ok(line) = line else { continue };
443
444                trace!(line = %line, "Read line from Chromium stderr");
445
446                // Look for "DevTools listening on ws://..."
447                if let Some(pos) = line.find("DevTools listening on ") {
448                    let url = &line[pos + 22..];
449                    return Some(url.trim().to_string());
450                }
451            }
452
453            None
454        });
455
456        handle
457            .await
458            .map_err(|e| BrowserError::LaunchFailed(e.to_string()))?
459            .ok_or(BrowserError::LaunchFailed(
460                "failed to find WebSocket URL in browser output".into(),
461            ))
462    }
463}