Skip to main content

chaser_cf/core/
browser.rs

1//! Browser management for chaser-cf
2
3use crate::error::{ChaserError, ChaserResult};
4use crate::models::ProxyConfig;
5
6use chaser_oxide::cdp::browser_protocol::target::CreateTargetParams;
7use chaser_oxide::{Browser, BrowserConfig, ChaserPage};
8use futures::StreamExt;
9use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
10use std::sync::Arc;
11use tokio::sync::Semaphore;
12
13/// Normalize a Chrome-style flag for chaser-oxide's `ArgsBuilder`.
14///
15/// chaser-oxide stores arg keys WITHOUT the leading `--` and prepends
16/// `--` itself at command-build time. Passing pre-formatted strings
17/// (`"--no-sandbox"`) produces `"----no-sandbox"`, which Chrome silently
18/// ignores. This helper strips any leading dash chars so the rendered
19/// command-line argument comes out correctly:
20///
21///   "--no-sandbox"           -> "no-sandbox"           -> "--no-sandbox"
22///   "--key=value"            -> "key=value"            -> "--key=value"
23///   "no-sandbox"             -> "no-sandbox"           -> "--no-sandbox"
24///
25/// Both `--key=value` and `--key value` chrome-flag forms are supported
26/// (the renderer just emits the stored key verbatim with a `--` prefix).
27pub(crate) fn normalize_chrome_flag(raw: &str) -> String {
28    raw.trim_start_matches('-').to_string()
29}
30
31pub struct BrowserManager {
32    browser: Browser,
33    context_semaphore: Arc<Semaphore>,
34    active_contexts: Arc<AtomicUsize>,
35    max_contexts: usize,
36    healthy: Arc<AtomicBool>,
37    #[cfg(target_os = "linux")]
38    xvfb: Option<std::process::Child>,
39}
40
41impl BrowserManager {
42    pub async fn new(config: &super::ChaserConfig) -> ChaserResult<Self> {
43        // Baseline flags chaser-cf always sets, plus any extras the caller
44        // configured via ChaserConfig::with_extra_args / add_extra_arg /
45        // CHASER_EXTRA_ARGS env var. Common extras: --no-sandbox (when the
46        // host process runs as root), --disable-gpu, --disable-dev-shm-usage.
47        //
48        // Every flag goes through normalize_chrome_flag, which strips the
49        // leading `--` so chaser-oxide's ArgsBuilder doesn't double-render
50        // it as `----flag`. The original chaser-cf 0.1.0..0.1.4 baseline
51        // flags hit this exact bug and were silently ignored by Chrome
52        // for the entire lifetime of those releases.
53        // On Linux, optionally start an Xvfb virtual display and run Chrome
54        // headed inside it. This avoids all headless-detection heuristics at
55        // the cost of needing `Xvfb` installed (`apt install xvfb`).
56        #[cfg(target_os = "linux")]
57        let xvfb = if config.virtual_display {
58            let display = find_free_display();
59            let display_str = format!(":{display}");
60            let child = std::process::Command::new("Xvfb")
61                .args([
62                    &display_str,
63                    "-screen",
64                    "0",
65                    "1920x1080x24",
66                    "-ac",
67                    "+extension",
68                    "GLX",
69                    "+render",
70                    "-noreset",
71                ])
72                .spawn()
73                .map_err(|e| {
74                    ChaserError::InitFailed(format!(
75                        "Xvfb: {e}. Is xvfb installed? (apt install xvfb)"
76                    ))
77                })?;
78            // Give Xvfb time to open the socket before Chrome connects.
79            std::thread::sleep(std::time::Duration::from_millis(400));
80            // SAFETY: set_var is unsafe in multi-threaded code; we do this once
81            // at init before any page tasks are spawned so there are no races.
82            unsafe { std::env::set_var("DISPLAY", &display_str) };
83            Some(child)
84        } else {
85            None
86        };
87
88        let mut chrome_args: Vec<String> = vec![
89            normalize_chrome_flag("--disable-blink-features=AutomationControlled"),
90            normalize_chrome_flag("--disable-infobars"),
91        ];
92
93        // On Linux headless: set window to 1920×1080 so inner/outer dimensions
94        // match the screen dimensions we set via setDeviceMetricsOverride.
95        // Also disable WebGL — headless uses SwiftShader (software rasterizer)
96        // which is a detectable bot signal. Xvfb mode has no WebGL context at
97        // all and passes CF challenges fine, so headless matches that behaviour.
98        #[cfg(target_os = "linux")]
99        if config.headless && !config.virtual_display {
100            chrome_args.push(normalize_chrome_flag("--window-size=1920,1080"));
101            chrome_args.push(normalize_chrome_flag("--disable-webgl"));
102            chrome_args.push(normalize_chrome_flag("--disable-webgl2"));
103        }
104
105        chrome_args.extend(config.extra_args.iter().map(|a| normalize_chrome_flag(a)));
106
107        let mut builder = BrowserConfig::builder().viewport(None).args(chrome_args);
108
109        if let Some(ref path) = config.chrome_path {
110            builder = builder.chrome_executable(path.clone());
111        }
112
113        // Virtual display implies headed — headless flag is ignored when xvfb is active.
114        #[cfg(target_os = "linux")]
115        let use_headless = config.headless && !config.virtual_display;
116        #[cfg(not(target_os = "linux"))]
117        let use_headless = config.headless;
118
119        if !use_headless {
120            builder = builder.with_head();
121        } else {
122            builder = builder.new_headless_mode();
123        }
124
125        let browser_config = builder
126            .build()
127            .map_err(|e| ChaserError::InitFailed(e.to_string()))?;
128
129        let (browser, mut handler) = Browser::launch(browser_config)
130            .await
131            .map_err(|e| ChaserError::InitFailed(e.to_string()))?;
132
133        let healthy = Arc::new(AtomicBool::new(true));
134        let healthy_clone = healthy.clone();
135        tokio::spawn(async move {
136            loop {
137                match handler.next().await {
138                    Some(_) => {}
139                    None => {
140                        healthy_clone.store(false, Ordering::SeqCst);
141                        break;
142                    }
143                }
144            }
145        });
146
147        Ok(Self {
148            browser,
149            context_semaphore: Arc::new(Semaphore::new(config.context_limit)),
150            active_contexts: Arc::new(AtomicUsize::new(0)),
151            max_contexts: config.context_limit,
152            healthy,
153            #[cfg(target_os = "linux")]
154            xvfb,
155        })
156    }
157
158    pub fn is_healthy(&self) -> bool {
159        self.healthy.load(Ordering::SeqCst)
160    }
161
162    pub fn active_contexts(&self) -> usize {
163        self.active_contexts.load(Ordering::SeqCst)
164    }
165
166    pub fn max_contexts(&self) -> usize {
167        self.max_contexts
168    }
169
170    pub async fn acquire_permit(&self) -> ChaserResult<ContextPermit> {
171        let permit = self
172            .context_semaphore
173            .clone()
174            .acquire_owned()
175            .await
176            .map_err(|_| ChaserError::ContextFailed("Semaphore closed".to_string()))?;
177
178        self.active_contexts.fetch_add(1, Ordering::SeqCst);
179
180        Ok(ContextPermit {
181            _permit: permit,
182            active_contexts: self.active_contexts.clone(),
183        })
184    }
185
186    pub fn try_acquire_permit(&self) -> Option<ContextPermit> {
187        let permit = self.context_semaphore.clone().try_acquire_owned().ok()?;
188        self.active_contexts.fetch_add(1, Ordering::SeqCst);
189        Some(ContextPermit {
190            _permit: permit,
191            active_contexts: self.active_contexts.clone(),
192        })
193    }
194
195    pub async fn create_context(
196        &self,
197        proxy: Option<&ProxyConfig>,
198    ) -> ChaserResult<Option<chaser_oxide::cdp::browser_protocol::browser::BrowserContextId>> {
199        match proxy {
200            Some(p) => {
201                let ctx_id = self
202                    .browser
203                    .create_incognito_context_with_proxy(p.to_url())
204                    .await
205                    .map_err(|e| ChaserError::ContextFailed(e.to_string()))?;
206                Ok(Some(ctx_id))
207            }
208            None => Ok(None),
209        }
210    }
211
212    /// Open a blank page, apply the native profile (OS + real Chrome version), then
213    /// navigate to `url`. Proxy auth is handled by the caller before navigation.
214    pub async fn new_page(
215        &self,
216        ctx_id: Option<chaser_oxide::cdp::browser_protocol::browser::BrowserContextId>,
217        url: &str,
218    ) -> ChaserResult<(chaser_oxide::Page, ChaserPage)> {
219        let mut params = CreateTargetParams::new("about:blank");
220        if let Some(id) = ctx_id {
221            params.browser_context_id = Some(id);
222        }
223
224        let page = self
225            .browser
226            .new_page(params)
227            .await
228            .map_err(|e| ChaserError::PageFailed(e.to_string()))?;
229
230        let chaser = ChaserPage::new(page.clone());
231
232        // On macOS/Windows use native profile — Chrome version, RAM, and GPU all
233        // match the real host, which is always the most convincing fingerprint.
234        // On Linux only, override with the configured profile (default: Windows)
235        // because native Linux leaks Os::Linux into UA + Sec-CH-UA-Platform-Version.
236        #[cfg(not(target_os = "linux"))]
237        chaser
238            .apply_native_profile()
239            .await
240            .map_err(|e| ChaserError::PageFailed(format!("apply_native_profile: {e}")))?;
241
242        #[cfg(target_os = "linux")]
243        {
244            // Always use the native profile on Linux — both headless and Xvfb.
245            // Cloudflare compares the HTTP Sec-CH-UA headers with JS navigator.userAgentData;
246            // if we spoof with a Windows profile, the Chromium binary sends "Chromium" (no
247            // "Google Chrome") in Sec-CH-UA while our JS says "Google Chrome" → instant
248            // detection. Native profile keeps both consistent and matches what cf-clearance-scraper
249            // sends. The configured Profile is honoured only on macOS/Windows builds.
250            chaser
251                .apply_native_profile()
252                .await
253                .map_err(|e| ChaserError::PageFailed(format!("apply_native_profile: {e}")))?;
254
255            // NOTE: LINUX_SCREEN_PATCH and LINUX_PERMS_PATCH are intentionally NOT
256            // applied here. Both patches replace native browser APIs with JS functions,
257            // making `screen.width.toString()` and `navigator.permissions.query.toString()`
258            // return non-native code strings — a reliable bot signal that CF's managed
259            // challenge JS checks. cf-clearance-scraper does not patch these APIs and
260            // passes the same challenge. The 800×600 screen and 'denied' notification
261            // permission are acceptable — CF does not gate on these values.
262        }
263
264        if url != "about:blank" {
265            chaser
266                .goto(url)
267                .await
268                .map_err(|e| ChaserError::NavigationFailed(e.to_string()))?;
269        }
270
271        Ok((page, chaser))
272    }
273
274    pub async fn shutdown(self) {
275        self.healthy.store(false, Ordering::SeqCst);
276        #[cfg(target_os = "linux")]
277        if let Some(mut child) = self.xvfb {
278            let _ = child.kill();
279        }
280    }
281}
282
283/// Find the lowest unused X display number by checking /tmp/.X{n}-lock.
284#[cfg(target_os = "linux")]
285fn find_free_display() -> u32 {
286    for n in 99u32..200 {
287        if !std::path::Path::new(&format!("/tmp/.X{n}-lock")).exists() {
288            return n;
289        }
290    }
291    199
292}
293
294pub struct ContextPermit {
295    _permit: tokio::sync::OwnedSemaphorePermit,
296    active_contexts: Arc<AtomicUsize>,
297}
298
299impl Drop for ContextPermit {
300    fn drop(&mut self) {
301        self.active_contexts.fetch_sub(1, Ordering::SeqCst);
302    }
303}
304
305#[cfg(test)]
306mod tests {
307    use super::normalize_chrome_flag;
308
309    #[test]
310    fn normalize_strips_double_dash_keys() {
311        assert_eq!(normalize_chrome_flag("--no-sandbox"), "no-sandbox");
312        assert_eq!(normalize_chrome_flag("--disable-gpu"), "disable-gpu");
313    }
314
315    #[test]
316    fn normalize_strips_double_dash_keyvalue() {
317        assert_eq!(
318            normalize_chrome_flag("--disable-blink-features=AutomationControlled"),
319            "disable-blink-features=AutomationControlled"
320        );
321    }
322
323    #[test]
324    fn normalize_passes_through_already_clean() {
325        assert_eq!(normalize_chrome_flag("no-sandbox"), "no-sandbox");
326        assert_eq!(normalize_chrome_flag("key=value"), "key=value");
327    }
328
329    #[test]
330    fn normalize_handles_single_dash_too() {
331        // Some legacy chrome flags use a single dash; trim_start_matches('-')
332        // strips any number, so both forms normalize identically.
333        assert_eq!(normalize_chrome_flag("-no-sandbox"), "no-sandbox");
334        assert_eq!(normalize_chrome_flag("---no-sandbox"), "no-sandbox");
335    }
336}