orb_browse/
lib.rs

1//! # orb-browse
2//!
3//! A TUI browser widget for Rust with WebDriver automation and bot detection bypass.
4//!
5//! **orb-browse** provides a reusable browser component that can be embedded in terminal
6//! applications using ratatui, with the unique ability to bypass bot detection systems.
7//!
8//! ## Features
9//!
10//! - 🌐 **WebDriver Automation** - Full browser control via W3C WebDriver
11//! - 🥷 **Bot Detection Bypass** - Patches ChromeDriver to avoid detection
12//! - 🎨 **TUI Widget** - Embeddable ratatui widget for terminal apps
13//! - 📸 **Screenshot Capture** - Capture web pages as images
14//! - 🚀 **Auto-Setup** - Downloads and configures ChromeDriver automatically
15//!
16//! ## Quick Start
17//!
18//! ```no_run
19//! use orb_browse::OrbBrowser;
20//!
21//! #[tokio::main]
22//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
23//!     // Create a browser instance
24//!     let browser = OrbBrowser::new().await?;
25//!
26//!     // Navigate and capture screenshot
27//!     let screenshot = browser.capture("https://google.com", 1920, 1080).await?;
28//!
29//!     // Save screenshot
30//!     std::fs::write("google.png", &screenshot)?;
31//!
32//!     Ok(())
33//! }
34//! ```
35//!
36//! ## Use as TUI Widget
37//!
38//! ```no_run
39//! use orb_browse::{OrbBrowser, widget::{BrowserWidget, BrowserState}};
40//!
41//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
42//! let browser = OrbBrowser::new().await?;
43//! let mut state = BrowserState::new();
44//!
45//! // Navigate to a URL
46//! state.navigate(&browser, "https://example.com".to_string()).await?;
47//!
48//! // In your ratatui render loop:
49//! // frame.render_widget(BrowserWidget::new(&state), area);
50//! # Ok(())
51//! # }
52//! ```
53//!
54//! ## How It Works
55//!
56//! 1. **Binary Patching**: Removes `$cdc_` markers from ChromeDriver
57//! 2. **Automation Bypass Flags**: Disables `--enable-automation` and automation hints
58//! 3. **JavaScript Injection**: Overrides `navigator.webdriver` and mocks APIs
59//! 4. **WebDriver Protocol**: Uses W3C WebDriver (not detectable CDP)
60//!
61//! This achieves functional parity with Python's `undetected-chromedriver`.
62
63pub mod patcher;
64pub mod injections;
65
66#[cfg(feature = "webdriver")]
67pub mod client;
68
69#[cfg(feature = "tui")]
70pub mod widget;
71
72// Re-exports for convenience
73pub use patcher::ChromeDriverPatcher;
74pub use injections::COMPREHENSIVE_BOOTSTRAP;
75
76#[cfg(feature = "webdriver")]
77pub use client::OrbBrowser;
78
79use color_eyre::Result;
80use std::path::PathBuf;
81use std::process::{Command, Child};
82use std::fs;
83use std::net::TcpListener;
84
85/// Get the installed Chrome/Chromium version
86fn get_chrome_version() -> Result<String> {
87    let chrome_path = find_chrome()
88        .ok_or_else(|| color_eyre::eyre::eyre!("Chrome/Chromium not found on system"))?;
89
90    let output = Command::new(&chrome_path)
91        .arg("--version")
92        .output()
93        .map_err(|e| color_eyre::eyre::eyre!("Failed to get Chrome version: {}", e))?;
94
95    let version_str = String::from_utf8(output.stdout)
96        .map_err(|e| color_eyre::eyre::eyre!("Failed to parse Chrome version: {}", e))?;
97
98    // Parse version string like "Chromium 143.0.7499.192" or "Google Chrome 131.0.6778.204"
99    let version = version_str
100        .split_whitespace()
101        .nth(1)
102        .and_then(|v| v.split('.').next())
103        .ok_or_else(|| color_eyre::eyre::eyre!("Failed to parse Chrome version from: {}", version_str.trim()))?;
104
105    Ok(version.to_string())
106}
107
108/// Get the full ChromeDriver version string for a given Chrome major version
109fn get_chromedriver_version(chrome_major: &str) -> Result<String> {
110    // Query the Chrome for Testing JSON endpoint to get the latest stable version
111    let url = format!(
112        "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json"
113    );
114
115    let output = Command::new("curl")
116        .arg("-sL")
117        .arg(&url)
118        .output()
119        .map_err(|e| color_eyre::eyre::eyre!("Failed to fetch ChromeDriver version info: {}", e))?;
120
121    if !output.status.success() {
122        return Err(color_eyre::eyre::eyre!("Failed to fetch ChromeDriver version info"));
123    }
124
125    let json_str = String::from_utf8(output.stdout)
126        .map_err(|e| color_eyre::eyre::eyre!("Failed to parse version JSON: {}", e))?;
127
128    // Parse JSON to find the version for this milestone
129    let json: serde_json::Value = serde_json::from_str(&json_str)
130        .map_err(|e| color_eyre::eyre::eyre!("Failed to parse version JSON: {}", e))?;
131
132    let version = json
133        .get("milestones")
134        .and_then(|m| m.get(chrome_major))
135        .and_then(|v| v.get("version"))
136        .and_then(|v| v.as_str())
137        .ok_or_else(|| color_eyre::eyre::eyre!(
138            "ChromeDriver version {} not found. Your Chrome version may be too new or too old. \
139             Visit https://googlechromelabs.github.io/chrome-for-testing/ for available versions.",
140            chrome_major
141        ))?;
142
143    Ok(version.to_string())
144}
145
146/// Download ChromeDriver for the current system
147fn download_chromedriver() -> Result<PathBuf> {
148    let cache_dir = dirs::cache_dir()
149        .unwrap_or_else(|| PathBuf::from("/tmp"))
150        .join("orb-browse/drivers");
151
152    fs::create_dir_all(&cache_dir)?;
153
154    // Detect installed Chrome version
155    let chrome_major = get_chrome_version()?;
156
157    // Get matching ChromeDriver version
158    let driver_version = get_chromedriver_version(&chrome_major)?;
159
160    // Version-specific driver path to cache different versions
161    let driver_path = cache_dir.join(format!("chromedriver-{}", chrome_major));
162
163    // If already exists for this version, return it
164    if driver_path.exists() {
165        return Ok(driver_path);
166    }
167
168    // Determine ChromeDriver download URL based on platform
169    let (os, arch) = if cfg!(target_os = "linux") {
170        if cfg!(target_arch = "x86_64") {
171            ("linux64", "chromedriver-linux64")
172        } else {
173            return Err(color_eyre::eyre::eyre!("Unsupported architecture"));
174        }
175    } else if cfg!(target_os = "macos") {
176        if cfg!(target_arch = "aarch64") {
177            ("mac-arm64", "chromedriver-mac-arm64")
178        } else {
179            ("mac-x64", "chromedriver-mac-x64")
180        }
181    } else {
182        return Err(color_eyre::eyre::eyre!("Unsupported OS"));
183    };
184
185    // Download matching ChromeDriver version
186    let url = format!(
187        "https://storage.googleapis.com/chrome-for-testing-public/{}/{}/chromedriver-{}.zip",
188        driver_version, os, os
189    );
190
191    // Download with curl
192    let zip_path = cache_dir.join(format!("chromedriver-{}.zip", chrome_major));
193    let status = Command::new("curl")
194        .arg("-L")
195        .arg("-o")
196        .arg(&zip_path)
197        .arg(&url)
198        .stdout(std::process::Stdio::null())
199        .stderr(std::process::Stdio::null())
200        .status()?;
201
202    if !status.success() {
203        return Err(color_eyre::eyre::eyre!(
204            "Failed to download ChromeDriver {} for Chrome {}. \
205             Check your internet connection or visit https://googlechromelabs.github.io/chrome-for-testing/",
206            driver_version, chrome_major
207        ));
208    }
209
210    // Extract with unzip
211    let status = Command::new("unzip")
212        .arg("-o")
213        .arg(&zip_path)
214        .arg("-d")
215        .arg(&cache_dir)
216        .stdout(std::process::Stdio::null())
217        .stderr(std::process::Stdio::null())
218        .status()?;
219
220    if !status.success() {
221        return Err(color_eyre::eyre::eyre!("Failed to extract ChromeDriver"));
222    }
223
224    // Move the binary to the expected location
225    let extracted_path = cache_dir.join(arch).join("chromedriver");
226    fs::rename(&extracted_path, &driver_path)?;
227
228    // Make executable
229    #[cfg(unix)]
230    {
231        use std::os::unix::fs::PermissionsExt;
232        let mut perms = fs::metadata(&driver_path)?.permissions();
233        perms.set_mode(0o755);
234        fs::set_permissions(&driver_path, perms)?;
235    }
236
237    // Clean up
238    let _ = fs::remove_file(&zip_path);
239    let _ = fs::remove_dir_all(cache_dir.join(arch));
240
241    // Clean up old cached drivers for different Chrome versions
242    if let Ok(entries) = fs::read_dir(&cache_dir) {
243        for entry in entries.flatten() {
244            let path = entry.path();
245            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
246                // Remove old versioned drivers and the legacy "chromedriver" symlink
247                if (name.starts_with("chromedriver-") && name != format!("chromedriver-{}", chrome_major))
248                    || name == "chromedriver" {
249                    let _ = fs::remove_file(&path);
250                }
251            }
252        }
253    }
254
255    // Create a symlink for backward compatibility
256    #[cfg(unix)]
257    {
258        let symlink_path = cache_dir.join("chromedriver");
259        let _ = fs::remove_file(&symlink_path); // Remove if exists
260        let _ = std::os::unix::fs::symlink(&driver_path, &symlink_path);
261    }
262
263    Ok(driver_path)
264}
265
266/// Find an available port for ChromeDriver
267fn find_available_port() -> Result<u16> {
268    let listener = TcpListener::bind("127.0.0.1:0")?;
269    let port = listener.local_addr()?.port();
270    Ok(port)
271}
272
273/// Launch patched ChromeDriver and return the WebDriver URL and process handle
274///
275/// This function:
276/// 1. Downloads ChromeDriver if not present
277/// 2. Patches it to remove $cdc_ markers
278/// 3. Launches it on a random available port
279/// 4. Returns the WebDriver endpoint URL and process handle
280///
281/// ## Example
282///
283/// ```no_run
284/// use orb_browse::launch_patched_chromedriver;
285///
286/// # fn main() -> color_eyre::Result<()> {
287/// let (webdriver_url, mut process) = launch_patched_chromedriver()?;
288/// println!("ChromeDriver running at: {}", webdriver_url);
289///
290/// // ... use the WebDriver ...
291///
292/// // Clean up
293/// process.kill()?;
294/// # Ok(())
295/// # }
296/// ```
297pub fn launch_patched_chromedriver() -> Result<(String, Child)> {
298    // Download ChromeDriver
299    let original_driver = download_chromedriver()?;
300
301    // Patch it
302    let patcher = ChromeDriverPatcher::new();
303    let patched_driver = patcher.patch_driver(&original_driver)?;
304
305    // Find available port
306    let port = find_available_port()?;
307
308    // Check if verbose mode is enabled via environment variable
309    let verbose = std::env::var("ORB_BROWSE_VERBOSE").is_ok();
310
311    // Launch ChromeDriver with optional verbose output
312    let mut cmd = Command::new(&patched_driver);
313    cmd.arg(format!("--port={}", port));
314
315    if verbose {
316        eprintln!("[orb-browse] Launching ChromeDriver on port {} with verbose output", port);
317        cmd.arg("--verbose");
318    } else {
319        cmd.arg("--silent")
320            .arg("--log-level=OFF")
321            .stdout(std::process::Stdio::null())
322            .stderr(std::process::Stdio::null());
323    }
324
325    let mut child = cmd
326        .spawn()
327        .map_err(|e| color_eyre::eyre::eyre!("Failed to launch ChromeDriver: {}", e))?;
328
329    let webdriver_url = format!("http://localhost:{}", port);
330
331    // Wait for ChromeDriver to become responsive (up to 3 seconds)
332    let mut attempts = 0;
333    let max_attempts = 30;
334    loop {
335        std::thread::sleep(std::time::Duration::from_millis(100));
336        attempts += 1;
337
338        // Check if ChromeDriver process is still alive
339        match child.try_wait() {
340            Ok(Some(status)) => {
341                return Err(color_eyre::eyre::eyre!(
342                    "ChromeDriver exited immediately with status: {}.\n\
343                     This is often caused by:\n\
344                     1. Missing dependencies (libnss3, libx11, etc.)\n\
345                     2. Chrome/ChromeDriver version mismatch\n\
346                     3. Chrome binary not executable\n\
347                     \n\
348                     Run with ORB_BROWSE_VERBOSE=1 to see detailed error output:\n\
349                     ORB_BROWSE_VERBOSE=1 cargo run",
350                    status
351                ));
352            }
353            Ok(None) => {
354                // Still running, try to connect
355                if std::net::TcpStream::connect(format!("127.0.0.1:{}", port)).is_ok() {
356                    if verbose {
357                        eprintln!("[orb-browse] ChromeDriver ready on port {}", port);
358                    }
359                    return Ok((webdriver_url, child));
360                }
361            }
362            Err(e) => {
363                return Err(color_eyre::eyre::eyre!("Failed to check ChromeDriver status: {}", e));
364            }
365        }
366
367        if attempts >= max_attempts {
368            let _ = child.kill();
369            return Err(color_eyre::eyre::eyre!(
370                "ChromeDriver failed to become responsive after 3 seconds.\n\
371                 The process is running but not accepting connections.\n\
372                 Run with ORB_BROWSE_VERBOSE=1 to see detailed output:\n\
373                 ORB_BROWSE_VERBOSE=1 cargo run"
374            ));
375        }
376    }
377}
378
379/// Find Chrome/Chromium binary on the system
380///
381/// Searches common Chrome/Chromium installation locations.
382/// You can override detection with the ORB_BROWSE_CHROME_PATH environment variable:
383/// ```bash
384/// export ORB_BROWSE_CHROME_PATH=/usr/bin/chromium-browser
385/// ```
386pub fn find_chrome() -> Option<PathBuf> {
387    // Check environment variable override first
388    if let Ok(chrome_path) = std::env::var("ORB_BROWSE_CHROME_PATH") {
389        let p = PathBuf::from(chrome_path);
390        if p.exists() {
391            return Some(p);
392        } else {
393            eprintln!("Warning: ORB_BROWSE_CHROME_PATH points to non-existent file: {}", p.display());
394        }
395    }
396
397    let candidates = vec![
398        // Common installation paths
399        "/usr/bin/chromium-browser",
400        "/usr/bin/chromium",
401        "/usr/bin/google-chrome",
402        "/usr/bin/google-chrome-stable",
403        "/usr/bin/chrome",
404        "/snap/bin/chromium",
405        "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
406        "/Applications/Chromium.app/Contents/MacOS/Chromium",
407    ];
408
409    for candidate in candidates {
410        let p = PathBuf::from(candidate);
411        if p.exists() {
412            return Some(p);
413        }
414    }
415
416    None
417}