orb_browse/
client.rs

1//! WebDriver client with bot detection bypass
2
3use crate::{find_chrome, injections::COMPREHENSIVE_BOOTSTRAP, launch_patched_chromedriver};
4use color_eyre::Result;
5use fantoccini::ClientBuilder;
6use serde_json::json;
7use std::process::Child;
8use base64::Engine;
9
10/// A browser client that bypasses bot detection
11///
12/// This wraps fantoccini's WebDriver client with:
13/// - Patched ChromeDriver (no $cdc_ markers)
14/// - Automation bypass flags
15/// - JavaScript injection to hide automation markers
16///
17/// ## Example
18///
19/// ```no_run
20/// use orb_browse::OrbBrowser;
21///
22/// #[tokio::main]
23/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
24///     let browser = OrbBrowser::new().await?;
25///     let screenshot = browser.capture("https://google.com", 1920, 1080).await?;
26///     std::fs::write("google.png", &screenshot)?;
27///     Ok(())
28/// }
29/// ```
30pub struct OrbBrowser {
31    client: fantoccini::Client,
32    _chromedriver: Child,
33}
34
35impl OrbBrowser {
36    /// Create a new browser instance
37    ///
38    /// This will:
39    /// 1. Download and patch ChromeDriver (if not already cached)
40    /// 2. Launch ChromeDriver on a random port
41    /// 3. Connect a WebDriver client with bot detection bypass
42    pub async fn new() -> Result<Self> {
43        Self::with_size(1920, 1080).await
44    }
45
46    /// Create a new browser with custom window size
47    pub async fn with_size(width: u32, height: u32) -> Result<Self> {
48        let chrome_path = find_chrome()
49            .ok_or_else(|| color_eyre::eyre::eyre!("Chrome/Chromium not found"))?;
50
51        // Launch patched ChromeDriver
52        let (webdriver_url, chromedriver_process) = launch_patched_chromedriver()?;
53
54        // Build Chrome capabilities with automation bypass flags
55        let mut caps = serde_json::Map::new();
56        caps.insert("browserName".to_string(), json!("chrome"));
57
58        let mut chrome_options = serde_json::Map::new();
59        chrome_options.insert("binary".to_string(), json!(chrome_path.to_str().unwrap()));
60
61        let window_size_arg = format!("--window-size={},{}", width, height);
62        let args = vec![
63            "--headless",
64            "--remote-debugging-pipe",
65            "--disable-blink-features=AutomationControlled",
66            "--disable-web-security",
67            "--disable-dev-shm-usage",
68            "--no-first-run",
69            "--disable-infobars",
70            "--disable-extensions",
71            "--disable-gpu",
72            "--no-sandbox",
73            "--disable-setuid-sandbox",
74            window_size_arg.as_str(),
75            "--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
76        ];
77        chrome_options.insert("args".to_string(), json!(args));
78
79        // Exclude automation switches
80        let excluded_switches = vec!["enable-automation", "enable-logging"];
81        chrome_options.insert("excludeSwitches".to_string(), json!(excluded_switches));
82
83        // Add prefs to hide automation
84        let prefs = json!({
85            "credentials_enable_service": false,
86            "profile.password_manager_enabled": false,
87        });
88        chrome_options.insert("prefs".to_string(), prefs);
89
90        caps.insert("goog:chromeOptions".to_string(), json!(chrome_options));
91
92        // Connect to ChromeDriver
93        let client = ClientBuilder::native()
94            .capabilities(caps)
95            .connect(&webdriver_url)
96            .await
97            .map_err(|e| color_eyre::eyre::eyre!("Failed to connect to ChromeDriver: {}", e))?;
98
99        Ok(Self {
100            client,
101            _chromedriver: chromedriver_process,
102        })
103    }
104
105    /// Navigate to a URL
106    pub async fn goto(&self, url: &str) -> Result<()> {
107        self.client
108            .goto(url)
109            .await
110            .map_err(|e| color_eyre::eyre::eyre!("Failed to navigate: {}", e))?;
111
112        // Inject bypass script after navigation
113        if !url.starts_with("file://") {
114            let _ = self
115                .client
116                .execute(COMPREHENSIVE_BOOTSTRAP, vec![])
117                .await;
118        }
119
120        Ok(())
121    }
122
123    /// Capture a screenshot of a URL
124    ///
125    /// Returns PNG bytes
126    pub async fn capture(&self, url: &str, _width: u32, _height: u32) -> Result<Vec<u8>> {
127        // Inject bypass script before navigation
128        if !url.starts_with("file://") {
129            let _ = self
130                .client
131                .execute(COMPREHENSIVE_BOOTSTRAP, vec![])
132                .await;
133        }
134
135        self.goto(url).await?;
136
137        // Wait for page load
138        tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
139
140        // Inject again after page load (some sites re-check)
141        if !url.starts_with("file://") {
142            let _ = self
143                .client
144                .execute(COMPREHENSIVE_BOOTSTRAP, vec![])
145                .await;
146        }
147
148        // Capture screenshot
149        let screenshot_b64 = self
150            .client
151            .screenshot()
152            .await
153            .map_err(|e| color_eyre::eyre::eyre!("Failed to capture screenshot: {}", e))?;
154
155        // Decode base64
156        let screenshot_bytes = base64::prelude::BASE64_STANDARD
157            .decode(&screenshot_b64)
158            .map_err(|e| color_eyre::eyre::eyre!("Failed to decode screenshot: {}", e))?;
159
160        Ok(screenshot_bytes)
161    }
162
163    /// Get the underlying fantoccini client for advanced usage
164    pub fn client(&self) -> &fantoccini::Client {
165        &self.client
166    }
167
168    /// Close the browser
169    pub async fn close(mut self) -> Result<()> {
170        let _ = self.client.close().await;
171        let _ = self._chromedriver.kill();
172        Ok(())
173    }
174}