Skip to main content

dravr_browser/
launch.rs

1// ABOUTME: Chrome launch + persistent-profile management for headless automation
2// ABOUTME: Reuses an on-disk profile so cookies (cf_clearance, auth bearers) survive across launches
3//
4// SPDX-License-Identifier: MIT OR Apache-2.0
5// Copyright (c) 2026 dravr.ai
6
7use std::env;
8use std::fs;
9use std::path::PathBuf;
10use std::process;
11use std::time::{SystemTime, UNIX_EPOCH};
12
13use chromiumoxide::browser::{Browser, BrowserConfig};
14use futures::StreamExt;
15use tracing::{debug, info};
16
17use crate::error::{BrowserError, BrowserResult};
18use crate::stealth::{apply_stealth, StealthOptions};
19
20/// Environment variable holding a CDP WebSocket URL to attach to an
21/// externally-launched Chrome instead of spawning a new one.
22pub const CONNECT_URL_ENV: &str = "DRAVR_BROWSER_CONNECT_URL";
23
24/// Configuration for launching (or attaching to) a Chrome browser.
25#[derive(Debug, Clone)]
26pub struct BrowserLaunchConfig {
27    /// Path to a Chrome/Chromium binary. `None` lets chromiumoxide auto-detect.
28    pub chrome_path: Option<String>,
29    /// Run Chrome headless. Set `false` for interactive (one-time) logins.
30    pub headless: bool,
31    /// Base directory for persistent per-profile Chrome data. A `profile_id`
32    /// resolves to `{profile_base_dir}/{id}`; cookies and `localStorage`
33    /// persist there across launches.
34    pub profile_base_dir: PathBuf,
35    /// Optional `--proxy-server` URL. A literal `{session_id}` placeholder is
36    /// replaced with the launch `profile_id` (sticky residential routing).
37    pub proxy_url: Option<String>,
38    /// Optional override for the `User-Agent` string. `None` selects a
39    /// platform-appropriate default.
40    pub user_agent: Option<String>,
41}
42
43impl Default for BrowserLaunchConfig {
44    fn default() -> Self {
45        Self {
46            chrome_path: env::var("CHROME_PATH").ok(),
47            headless: true,
48            profile_base_dir: env::var("DRAVR_BROWSER_PROFILE_DIR").map_or_else(
49                |_| env::temp_dir().join("dravr-browser-profiles"),
50                PathBuf::from,
51            ),
52            proxy_url: env::var("DRAVR_BROWSER_PROXY_URL")
53                .ok()
54                .filter(|s| !s.is_empty()),
55            user_agent: env::var("DRAVR_BROWSER_USER_AGENT")
56                .ok()
57                .filter(|s| !s.is_empty()),
58        }
59    }
60}
61
62/// Default `User-Agent` matching the host platform so the fingerprint is
63/// consistent with the egress IP (mismatches trigger Cloudflare escalation).
64fn default_user_agent() -> &'static str {
65    if cfg!(target_os = "linux") {
66        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
67         (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36"
68    } else {
69        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 \
70         (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36"
71    }
72}
73
74/// Launch a Chrome browser with the given configuration.
75///
76/// `profile_id` selects the on-disk profile directory:
77/// - `Some(id)` — reuses `{config.profile_base_dir}/{id}`. Cookies and
78///   storage persist across launches, so an interactive login performed once
79///   keeps the session valid for subsequent headless runs.
80/// - `None` — ephemeral temp profile under `env::temp_dir()`.
81///
82/// If [`CONNECT_URL_ENV`] is set, attaches to that externally-launched Chrome
83/// via CDP instead of spawning a new process.
84pub async fn launch_browser(
85    config: &BrowserLaunchConfig,
86    profile_id: Option<&str>,
87) -> BrowserResult<Browser> {
88    if let Ok(connect_url) = env::var(CONNECT_URL_ENV) {
89        if !connect_url.is_empty() {
90            return connect_browser(&connect_url).await;
91        }
92    }
93
94    let mut builder = BrowserConfig::builder();
95
96    if config.headless {
97        builder = builder.new_headless_mode();
98    } else {
99        builder = builder
100            .with_head()
101            .arg(("disable-features", "WebAuthentication"));
102    }
103
104    let profile_dir = profile_id.map_or_else(ephemeral_profile_dir, |id| {
105        let safe_id: String = id
106            .chars()
107            .map(|c| {
108                if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
109                    c
110                } else {
111                    '_'
112                }
113            })
114            .collect();
115        let dir = config.profile_base_dir.join(&safe_id);
116        match fs::create_dir_all(&dir) {
117            Ok(()) => dir,
118            Err(e) => {
119                debug!(error = %e, dir = %dir.display(), "Failed to create persistent profile dir, falling back to ephemeral");
120                ephemeral_profile_dir()
121            }
122        }
123    });
124
125    // `.hide()` removes the native `navigator.webdriver=true` Blink injects
126    // under headless+CDP and suppresses the automation infobar.
127    builder = builder.hide().arg("no-default-browser-check").arg((
128        "disable-features",
129        "Translate,IsolateOrigins,site-per-process",
130    ));
131
132    let user_agent = config
133        .user_agent
134        .clone()
135        .unwrap_or_else(|| default_user_agent().to_owned());
136
137    builder = builder
138        .arg("disable-gpu")
139        .no_sandbox()
140        .arg(("user-agent", user_agent.as_str()))
141        .user_data_dir(profile_dir)
142        .window_size(1920, 1080);
143
144    if let Some(proxy_url) = config.proxy_url.as_ref() {
145        let resolved = profile_id.map_or_else(
146            || proxy_url.clone(),
147            |id| proxy_url.replace("{session_id}", id),
148        );
149        builder = builder.arg(("proxy-server", resolved.as_str()));
150        debug!(
151            proxy_id = %profile_id.unwrap_or("(ephemeral)"),
152            "Routing browser through proxy"
153        );
154    }
155
156    if let Some(ref path) = config.chrome_path {
157        builder = builder.chrome_executable(path);
158    }
159
160    let browser_config = builder.build().map_err(|e| BrowserError::Browser {
161        reason: format!("Failed to configure browser: {e}"),
162    })?;
163
164    let (browser, mut handler) =
165        Browser::launch(browser_config)
166            .await
167            .map_err(|e| BrowserError::Browser {
168                reason: format!("Failed to launch browser: {e}"),
169            })?;
170
171    tokio::spawn(async move {
172        while let Some(event) = handler.next().await {
173            debug!(?event, "Browser event");
174        }
175    });
176
177    Ok(browser)
178}
179
180/// Connect to an externally-launched Chrome via its CDP WebSocket URL.
181///
182/// `ws_url` is the `webSocketDebuggerUrl` from
183/// `http://127.0.0.1:PORT/json/version` when Chrome was launched with
184/// `--remote-debugging-port=PORT`. The remote browser's existing pages and
185/// cookies remain; callers just open new tabs against the same process.
186pub async fn connect_browser(ws_url: &str) -> BrowserResult<Browser> {
187    info!(ws_url, "Connecting to externally-launched Chrome via CDP");
188    let (browser, mut handler) =
189        Browser::connect(ws_url.to_owned())
190            .await
191            .map_err(|e| BrowserError::Browser {
192                reason: format!("Failed to connect to Chrome at {ws_url}: {e}"),
193            })?;
194    tokio::spawn(async move {
195        while let Some(event) = handler.next().await {
196            debug!(?event, "Browser event");
197        }
198    });
199    Ok(browser)
200}
201
202/// Open a new page with stealth applied before any navigation.
203///
204/// Opens `about:blank`, registers the stealth payload (which fires on every
205/// subsequent frame creation), then navigates to `url`.
206pub async fn open_page_with_stealth(
207    browser: &Browser,
208    url: &str,
209    stealth: &StealthOptions,
210) -> BrowserResult<chromiumoxide::Page> {
211    let page = browser
212        .new_page("about:blank")
213        .await
214        .map_err(|e| BrowserError::Browser {
215            reason: format!("Failed to open blank page: {e}"),
216        })?;
217
218    apply_stealth(&page, stealth).await?;
219
220    page.goto(url).await.map_err(|e| BrowserError::Navigation {
221        reason: format!("Failed to navigate to {url}: {e}"),
222    })?;
223
224    Ok(page)
225}
226
227/// Build an ephemeral profile path under `env::temp_dir()` with a process-id
228/// plus nanosecond suffix to avoid `SingletonLock` conflicts.
229fn ephemeral_profile_dir() -> PathBuf {
230    env::temp_dir().join(format!(
231        "dravr-browser-{}",
232        process::id()
233            + SystemTime::now()
234                .duration_since(UNIX_EPOCH)
235                .unwrap_or_default()
236                .subsec_nanos()
237    ))
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243
244    #[test]
245    fn default_config_is_headless() {
246        let cfg = BrowserLaunchConfig::default();
247        assert!(cfg.headless);
248    }
249
250    #[test]
251    fn proxy_placeholder_substitution() {
252        let url = "http://user-{session_id}:pass@proxy:1234";
253        assert_eq!(
254            url.replace("{session_id}", "abc"),
255            "http://user-abc:pass@proxy:1234"
256        );
257    }
258
259    #[test]
260    fn ephemeral_dir_is_unique_prefix() {
261        let dir = ephemeral_profile_dir();
262        assert!(dir
263            .file_name()
264            .and_then(|n| n.to_str())
265            .is_some_and(|n| n.starts_with("dravr-browser-")));
266    }
267}