orb_browse/
lib.rs

1//! # orb-browse
2//!
3//! A TUI browser widget for Rust with WebDriver automation and bot detection bypass.
4//!
5//! **orb-browse** provides a reusable browser component that can be embedded in terminal
6//! applications using ratatui, with the unique ability to bypass bot detection systems.
7//!
8//! ## Features
9//!
10//! - 🌐 **WebDriver Automation** - Full browser control via W3C WebDriver
11//! - 🥷 **Bot Detection Bypass** - Patches ChromeDriver to avoid detection
12//! - 🎨 **TUI Widget** - Embeddable ratatui widget for terminal apps
13//! - 📸 **Screenshot Capture** - Capture web pages as images
14//! - 🚀 **Auto-Setup** - Downloads and configures ChromeDriver automatically
15//!
16//! ## Quick Start
17//!
18//! ```no_run
19//! use orb_browse::OrbBrowser;
20//!
21//! #[tokio::main]
22//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
23//!     // Create a browser instance
24//!     let browser = OrbBrowser::new().await?;
25//!
26//!     // Navigate and capture screenshot
27//!     let screenshot = browser.capture("https://google.com", 1920, 1080).await?;
28//!
29//!     // Save screenshot
30//!     std::fs::write("google.png", &screenshot)?;
31//!
32//!     Ok(())
33//! }
34//! ```
35//!
36//! ## Use as TUI Widget
37//!
38//! ```no_run
39//! use orb_browse::{OrbBrowser, widget::{BrowserWidget, BrowserState}};
40//!
41//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
42//! let browser = OrbBrowser::new().await?;
43//! let mut state = BrowserState::new();
44//!
45//! // Navigate to a URL
46//! state.navigate(&browser, "https://example.com".to_string()).await?;
47//!
48//! // In your ratatui render loop:
49//! // frame.render_widget(BrowserWidget::new(&state), area);
50//! # Ok(())
51//! # }
52//! ```
53//!
54//! ## How It Works
55//!
56//! 1. **Binary Patching**: Removes `$cdc_` markers from ChromeDriver
57//! 2. **Automation Bypass Flags**: Disables `--enable-automation` and automation hints
58//! 3. **JavaScript Injection**: Overrides `navigator.webdriver` and mocks APIs
59//! 4. **WebDriver Protocol**: Uses W3C WebDriver (not detectable CDP)
60//!
61//! This achieves functional parity with Python's `undetected-chromedriver`.
62
63pub mod patcher;
64pub mod injections;
65
66#[cfg(feature = "webdriver")]
67pub mod client;
68
69#[cfg(feature = "tui")]
70pub mod widget;
71
72// Re-exports for convenience
73pub use patcher::ChromeDriverPatcher;
74pub use injections::COMPREHENSIVE_BOOTSTRAP;
75
76#[cfg(feature = "webdriver")]
77pub use client::OrbBrowser;
78
79use color_eyre::Result;
80use std::path::PathBuf;
81use std::process::{Command, Child};
82use std::fs;
83use std::net::TcpListener;
84
85/// Check if Chrome installation will work with ChromeDriver
86fn validate_chrome_installation(chrome_path: &PathBuf) -> Result<()> {
87    let path_str = chrome_path.to_string_lossy();
88
89    if path_str.contains("/snap/") {
90        return Err(color_eyre::eyre::eyre!(
91            "Snap Chromium detected at: {}\n\
92             \n\
93             Snap-packaged Chromium cannot be controlled by ChromeDriver due to snap sandboxing.\n\
94             \n\
95             Solutions:\n\
96             1. Install Chrome/Chromium via apt (recommended):\n\
97                sudo snap remove chromium\n\
98                sudo apt update\n\
99                sudo apt install chromium-browser\n\
100             \n\
101             2. Or install Google Chrome:\n\
102                wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb\n\
103                sudo apt install ./google-chrome-stable_current_amd64.deb\n\
104             \n\
105             3. Set ORB_BROWSE_CHROME_PATH to a non-snap Chrome:\n\
106                export ORB_BROWSE_CHROME_PATH=/usr/bin/chromium-browser",
107            path_str
108        ));
109    }
110
111    Ok(())
112}
113
114/// Get the installed Chrome/Chromium version
115fn get_chrome_version() -> Result<String> {
116    let chrome_path = find_chrome()
117        .ok_or_else(|| color_eyre::eyre::eyre!("Chrome/Chromium not found on system"))?;
118
119    // Validate that this Chrome installation will work
120    validate_chrome_installation(&chrome_path)?;
121
122    let output = Command::new(&chrome_path)
123        .arg("--version")
124        .output()
125        .map_err(|e| color_eyre::eyre::eyre!("Failed to get Chrome version: {}", e))?;
126
127    let version_str = String::from_utf8(output.stdout)
128        .map_err(|e| color_eyre::eyre::eyre!("Failed to parse Chrome version: {}", e))?;
129
130    // Parse version string like "Chromium 143.0.7499.192" or "Google Chrome 131.0.6778.204"
131    let version = version_str
132        .split_whitespace()
133        .nth(1)
134        .and_then(|v| v.split('.').next())
135        .ok_or_else(|| color_eyre::eyre::eyre!("Failed to parse Chrome version from: {}", version_str.trim()))?;
136
137    Ok(version.to_string())
138}
139
140/// Get the full ChromeDriver version string for a given Chrome major version
141fn get_chromedriver_version(chrome_major: &str) -> Result<String> {
142    // Query the Chrome for Testing JSON endpoint to get the latest stable version
143    let url = format!(
144        "https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json"
145    );
146
147    let output = Command::new("curl")
148        .arg("-sL")
149        .arg(&url)
150        .output()
151        .map_err(|e| color_eyre::eyre::eyre!("Failed to fetch ChromeDriver version info: {}", e))?;
152
153    if !output.status.success() {
154        return Err(color_eyre::eyre::eyre!("Failed to fetch ChromeDriver version info"));
155    }
156
157    let json_str = String::from_utf8(output.stdout)
158        .map_err(|e| color_eyre::eyre::eyre!("Failed to parse version JSON: {}", e))?;
159
160    // Parse JSON to find the version for this milestone
161    let json: serde_json::Value = serde_json::from_str(&json_str)
162        .map_err(|e| color_eyre::eyre::eyre!("Failed to parse version JSON: {}", e))?;
163
164    let version = json
165        .get("milestones")
166        .and_then(|m| m.get(chrome_major))
167        .and_then(|v| v.get("version"))
168        .and_then(|v| v.as_str())
169        .ok_or_else(|| color_eyre::eyre::eyre!(
170            "ChromeDriver version {} not found. Your Chrome version may be too new or too old. \
171             Visit https://googlechromelabs.github.io/chrome-for-testing/ for available versions.",
172            chrome_major
173        ))?;
174
175    Ok(version.to_string())
176}
177
178/// Download ChromeDriver for the current system
179fn download_chromedriver() -> Result<PathBuf> {
180    let cache_dir = dirs::cache_dir()
181        .unwrap_or_else(|| PathBuf::from("/tmp"))
182        .join("orb-browse/drivers");
183
184    fs::create_dir_all(&cache_dir)?;
185
186    // Detect installed Chrome version
187    let chrome_major = get_chrome_version()?;
188
189    // Get matching ChromeDriver version
190    let driver_version = get_chromedriver_version(&chrome_major)?;
191
192    // Version-specific driver path to cache different versions
193    let driver_path = cache_dir.join(format!("chromedriver-{}", chrome_major));
194
195    // If already exists for this version, return it
196    if driver_path.exists() {
197        return Ok(driver_path);
198    }
199
200    // Determine ChromeDriver download URL based on platform
201    let (os, arch) = if cfg!(target_os = "linux") {
202        if cfg!(target_arch = "x86_64") {
203            ("linux64", "chromedriver-linux64")
204        } else {
205            return Err(color_eyre::eyre::eyre!("Unsupported architecture"));
206        }
207    } else if cfg!(target_os = "macos") {
208        if cfg!(target_arch = "aarch64") {
209            ("mac-arm64", "chromedriver-mac-arm64")
210        } else {
211            ("mac-x64", "chromedriver-mac-x64")
212        }
213    } else {
214        return Err(color_eyre::eyre::eyre!("Unsupported OS"));
215    };
216
217    // Download matching ChromeDriver version
218    let url = format!(
219        "https://storage.googleapis.com/chrome-for-testing-public/{}/{}/chromedriver-{}.zip",
220        driver_version, os, os
221    );
222
223    // Download with curl
224    let zip_path = cache_dir.join(format!("chromedriver-{}.zip", chrome_major));
225    let status = Command::new("curl")
226        .arg("-L")
227        .arg("-o")
228        .arg(&zip_path)
229        .arg(&url)
230        .stdout(std::process::Stdio::null())
231        .stderr(std::process::Stdio::null())
232        .status()?;
233
234    if !status.success() {
235        return Err(color_eyre::eyre::eyre!(
236            "Failed to download ChromeDriver {} for Chrome {}. \
237             Check your internet connection or visit https://googlechromelabs.github.io/chrome-for-testing/",
238            driver_version, chrome_major
239        ));
240    }
241
242    // Extract with unzip
243    let status = Command::new("unzip")
244        .arg("-o")
245        .arg(&zip_path)
246        .arg("-d")
247        .arg(&cache_dir)
248        .stdout(std::process::Stdio::null())
249        .stderr(std::process::Stdio::null())
250        .status()?;
251
252    if !status.success() {
253        return Err(color_eyre::eyre::eyre!("Failed to extract ChromeDriver"));
254    }
255
256    // Move the binary to the expected location
257    let extracted_path = cache_dir.join(arch).join("chromedriver");
258    fs::rename(&extracted_path, &driver_path)?;
259
260    // Make executable
261    #[cfg(unix)]
262    {
263        use std::os::unix::fs::PermissionsExt;
264        let mut perms = fs::metadata(&driver_path)?.permissions();
265        perms.set_mode(0o755);
266        fs::set_permissions(&driver_path, perms)?;
267    }
268
269    // Clean up
270    let _ = fs::remove_file(&zip_path);
271    let _ = fs::remove_dir_all(cache_dir.join(arch));
272
273    // Clean up old cached drivers for different Chrome versions
274    if let Ok(entries) = fs::read_dir(&cache_dir) {
275        for entry in entries.flatten() {
276            let path = entry.path();
277            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
278                // Remove old versioned drivers and the legacy "chromedriver" symlink
279                if (name.starts_with("chromedriver-") && name != format!("chromedriver-{}", chrome_major))
280                    || name == "chromedriver" {
281                    let _ = fs::remove_file(&path);
282                }
283            }
284        }
285    }
286
287    // Create a symlink for backward compatibility
288    #[cfg(unix)]
289    {
290        let symlink_path = cache_dir.join("chromedriver");
291        let _ = fs::remove_file(&symlink_path); // Remove if exists
292        let _ = std::os::unix::fs::symlink(&driver_path, &symlink_path);
293    }
294
295    Ok(driver_path)
296}
297
298/// Find an available port for ChromeDriver
299fn find_available_port() -> Result<u16> {
300    let listener = TcpListener::bind("127.0.0.1:0")?;
301    let port = listener.local_addr()?.port();
302    Ok(port)
303}
304
305/// Launch patched ChromeDriver and return the WebDriver URL and process handle
306///
307/// This function:
308/// 1. Downloads ChromeDriver if not present
309/// 2. Patches it to remove $cdc_ markers
310/// 3. Launches it on a random available port
311/// 4. Returns the WebDriver endpoint URL and process handle
312///
313/// ## Example
314///
315/// ```no_run
316/// use orb_browse::launch_patched_chromedriver;
317///
318/// # fn main() -> color_eyre::Result<()> {
319/// let (webdriver_url, mut process) = launch_patched_chromedriver()?;
320/// println!("ChromeDriver running at: {}", webdriver_url);
321///
322/// // ... use the WebDriver ...
323///
324/// // Clean up
325/// process.kill()?;
326/// # Ok(())
327/// # }
328/// ```
329pub fn launch_patched_chromedriver() -> Result<(String, Child)> {
330    // Download ChromeDriver
331    let original_driver = download_chromedriver()?;
332
333    // Patch it
334    let patcher = ChromeDriverPatcher::new();
335    let patched_driver = patcher.patch_driver(&original_driver)?;
336
337    // Find available port
338    let port = find_available_port()?;
339
340    // Check if verbose mode is enabled via environment variable
341    let verbose = std::env::var("ORB_BROWSE_VERBOSE").is_ok();
342
343    // Launch ChromeDriver with optional verbose output
344    let mut cmd = Command::new(&patched_driver);
345    cmd.arg(format!("--port={}", port));
346
347    if verbose {
348        eprintln!("[orb-browse] Launching ChromeDriver on port {} with verbose output", port);
349        cmd.arg("--verbose");
350    } else {
351        cmd.arg("--silent")
352            .arg("--log-level=OFF")
353            .stdout(std::process::Stdio::null())
354            .stderr(std::process::Stdio::null());
355    }
356
357    let mut child = cmd
358        .spawn()
359        .map_err(|e| color_eyre::eyre::eyre!("Failed to launch ChromeDriver: {}", e))?;
360
361    let webdriver_url = format!("http://localhost:{}", port);
362
363    // Wait for ChromeDriver to become responsive (up to 3 seconds)
364    let mut attempts = 0;
365    let max_attempts = 30;
366    loop {
367        std::thread::sleep(std::time::Duration::from_millis(100));
368        attempts += 1;
369
370        // Check if ChromeDriver process is still alive
371        match child.try_wait() {
372            Ok(Some(status)) => {
373                return Err(color_eyre::eyre::eyre!(
374                    "ChromeDriver exited immediately with status: {}.\n\
375                     This is often caused by:\n\
376                     1. Snap Chromium sandboxing (use apt-installed Chrome instead)\n\
377                     2. Missing dependencies\n\
378                     3. Chrome/ChromeDriver version mismatch\n\
379                     \n\
380                     Run with ORB_BROWSE_VERBOSE=1 to see detailed error output:\n\
381                     ORB_BROWSE_VERBOSE=1 cargo run",
382                    status
383                ));
384            }
385            Ok(None) => {
386                // Still running, try to connect
387                if std::net::TcpStream::connect(format!("127.0.0.1:{}", port)).is_ok() {
388                    if verbose {
389                        eprintln!("[orb-browse] ChromeDriver ready on port {}", port);
390                    }
391                    return Ok((webdriver_url, child));
392                }
393            }
394            Err(e) => {
395                return Err(color_eyre::eyre::eyre!("Failed to check ChromeDriver status: {}", e));
396            }
397        }
398
399        if attempts >= max_attempts {
400            let _ = child.kill();
401            return Err(color_eyre::eyre::eyre!(
402                "ChromeDriver failed to become responsive after 3 seconds.\n\
403                 The process is running but not accepting connections.\n\
404                 Run with ORB_BROWSE_VERBOSE=1 to see detailed output:\n\
405                 ORB_BROWSE_VERBOSE=1 cargo run"
406            ));
407        }
408    }
409}
410
411/// Find Chrome/Chromium binary on the system
412///
413/// Prefers non-snap installations because snap-confined Chromium
414/// cannot be launched by ChromeDriver due to snap sandboxing.
415///
416/// You can override detection with the ORB_BROWSE_CHROME_PATH environment variable:
417/// ```bash
418/// export ORB_BROWSE_CHROME_PATH=/usr/bin/chromium-browser
419/// ```
420pub fn find_chrome() -> Option<PathBuf> {
421    // Check environment variable override first
422    if let Ok(chrome_path) = std::env::var("ORB_BROWSE_CHROME_PATH") {
423        let p = PathBuf::from(chrome_path);
424        if p.exists() {
425            return Some(p);
426        } else {
427            eprintln!("Warning: ORB_BROWSE_CHROME_PATH points to non-existent file: {}", p.display());
428        }
429    }
430
431    let candidates = vec![
432        // Prefer non-snap installations first
433        "/usr/bin/google-chrome",
434        "/usr/bin/google-chrome-stable",
435        "/usr/bin/chromium-browser",
436        "/usr/bin/chromium",
437        "/usr/bin/chrome",
438        "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
439        "/Applications/Chromium.app/Contents/MacOS/Chromium",
440        // Snap version last (will be rejected by validation)
441        "/snap/bin/chromium",
442    ];
443
444    for candidate in candidates {
445        let p = PathBuf::from(candidate);
446        if p.exists() {
447            return Some(p);
448        }
449    }
450
451    None
452}