Skip to main content

clawser_browser/
lib.rs

1//! Antidetect browser automation powered by chromiumoxide CDP.
2//!
3//! ```rust,no_run
4//! use clawser_browser::Browser;
5//!
6//! #[tokio::main]
7//! async fn main() -> clawser_browser::Result<()> {
8//!     let browser = Browser::builder()
9//!         .headful()
10//!         .profile(7, 777)
11//!         .build().await?;
12//!
13//!     let page = browser.new_page("https://example.com").await?;
14//!     let title = page.js("document.title").await?;
15//!     println!("{title}");
16//!     browser.close().await
17//! }
18//! ```
19
20mod client;
21mod profile;
22
23pub use client::{HttpClient, HttpClientBuilder};
24pub use profile::{generate_config_json, random_profile, write_config_file, HwProfile, PROFILES};
25
26// Re-export wreq types that users need
27pub use wreq;
28pub use wreq::header;
29
30use chromiumoxide::browser::{Browser as CdpBrowser, BrowserConfig};
31use chromiumoxide::cdp::browser_protocol::input::{
32    DispatchKeyEventParams, DispatchKeyEventType, DispatchMouseEventParams,
33    DispatchMouseEventType, MouseButton,
34};
35use chromiumoxide::cdp::browser_protocol::network::Cookie;
36use chromiumoxide::cdp::browser_protocol::page::{CaptureScreenshotParams, NavigateParams};
37use chromiumoxide::Page as CdpPage;
38use futures_util::StreamExt;
39use std::path::PathBuf;
40use std::time::Duration;
41use tokio::task::JoinHandle;
42
43pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
44
45// ── BrowserBuilder ──────────────────────────────────────────────
46
47pub struct BrowserBuilder {
48    headless: bool,
49    chrome_path: Option<String>,
50    profile_choice: ProfileChoice,
51    user_data_dir: Option<String>,
52    window_size: (u32, u32),
53    proxy: Option<String>,
54    extra_args: Vec<String>,
55}
56
57enum ProfileChoice {
58    None,
59    Random,
60    Indexed { index: usize, seed: u64 },
61    ConfigFile(String),
62}
63
64impl Default for BrowserBuilder {
65    fn default() -> Self {
66        Self {
67            headless: true,
68            chrome_path: None,
69            profile_choice: ProfileChoice::None,
70            user_data_dir: None,
71            window_size: (1920, 1080),
72            proxy: None,
73            extra_args: Vec::new(),
74        }
75    }
76}
77
78impl BrowserBuilder {
79    pub fn new() -> Self {
80        Self::default()
81    }
82
83    /// Run with visible window.
84    pub fn headful(mut self) -> Self {
85        self.headless = false;
86        self
87    }
88
89    /// Run in headless mode (default).
90    pub fn headless(mut self) -> Self {
91        self.headless = true;
92        self
93    }
94
95    /// Path to chrome.exe. Falls back to `CLAWSER_CHROME_PATH` env.
96    pub fn chrome_path(mut self, path: impl Into<String>) -> Self {
97        self.chrome_path = Some(path.into());
98        self
99    }
100
101    /// Use hardware profile `index` (0..99) with deterministic `seed`.
102    pub fn profile(mut self, index: usize, seed: u64) -> Self {
103        self.profile_choice = ProfileChoice::Indexed { index, seed };
104        self
105    }
106
107    /// Use a random hardware profile.
108    pub fn random(mut self) -> Self {
109        self.profile_choice = ProfileChoice::Random;
110        self
111    }
112
113    /// Use a custom clawser config JSON file path.
114    pub fn config(mut self, path: impl Into<String>) -> Self {
115        self.profile_choice = ProfileChoice::ConfigFile(path.into());
116        self
117    }
118
119    /// Persistent user data dir for cookies/storage.
120    pub fn user_data_dir(mut self, path: impl Into<String>) -> Self {
121        self.user_data_dir = Some(path.into());
122        self
123    }
124
125    /// Window size (default 1920x1080).
126    pub fn window_size(mut self, w: u32, h: u32) -> Self {
127        self.window_size = (w, h);
128        self
129    }
130
131    /// Set SOCKS5 proxy. Format: `"socks5://user:pass@host:port"`.
132    /// Also accepts `"socks5://host:port"` (no auth).
133    pub fn proxy(mut self, proxy: impl Into<String>) -> Self {
134        self.proxy = Some(proxy.into());
135        self
136    }
137
138    /// Set SOCKS5 proxy from host, port, user, pass components.
139    pub fn proxy_socks5(mut self, host: &str, port: u16, user: &str, pass: &str) -> Self {
140        self.proxy = Some(format!("socks5://{}:{}@{}:{}", user, pass, host, port));
141        self
142    }
143
144    /// Add a Chrome launch argument (e.g. `"disable-gpu"`).
145    pub fn arg(mut self, arg: impl Into<String>) -> Self {
146        self.extra_args.push(arg.into());
147        self
148    }
149
150    /// Launch the browser.
151    pub async fn build(self) -> Result<Browser> {
152        let chrome_path = self
153            .chrome_path
154            .or_else(|| std::env::var("CLAWSER_CHROME_PATH").ok())
155            .ok_or("set CLAWSER_CHROME_PATH env or call .chrome_path()")?;
156
157        let (config_path, profile_id) = match self.profile_choice {
158            ProfileChoice::Random => {
159                let (idx, seed) = profile::random_profile();
160                let p = profile::write_config_file(idx, seed)?;
161                (Some(p), Some(format!("clawser_{idx}_{seed}")))
162            }
163            ProfileChoice::Indexed { index, seed } => {
164                let p = profile::write_config_file(index, seed)?;
165                (Some(p), Some(format!("clawser_{index}_{seed}")))
166            }
167            ProfileChoice::ConfigFile(ref path) => (Some(PathBuf::from(path)), None),
168            ProfileChoice::None => (None, None),
169        };
170
171        let mut cb = BrowserConfig::builder()
172            .chrome_executable(&chrome_path)
173            .disable_default_args()
174            .no_sandbox()
175            .with_head()
176            .window_size(self.window_size.0, self.window_size.1)
177            .viewport(None);
178
179        if self.headless {
180            cb = cb.arg(("headless", "new"));
181        }
182
183        if let Some(ref cp) = config_path {
184            let p = cp.to_string_lossy().replace('/', "\\");
185            cb = cb.arg(("clawser-config", p.as_str()));
186        }
187
188        if let Some(ref udd) = self.user_data_dir {
189            cb = cb.user_data_dir(udd);
190        } else if let Some(ref id) = profile_id {
191            // Store profiles next to the chrome exe (project dir), not in %TEMP%.
192            let profiles_dir = std::path::Path::new(&chrome_path)
193                .parent()
194                .unwrap_or(std::path::Path::new("."))
195                .join("clawser_profiles")
196                .join(id);
197            cb = cb.user_data_dir(profiles_dir);
198        }
199
200        if let Some(ref proxy) = self.proxy {
201            cb = cb.arg(("proxy-server", proxy.as_str()));
202        }
203
204        cb = cb
205            .arg(("disable-blink-features", "AutomationControlled"))
206            .arg(("remote-allow-origins", "*"))
207            .arg("no-first-run")
208            .arg("no-default-browser-check");
209
210        for a in &self.extra_args {
211            cb = cb.arg(a.as_str());
212        }
213
214        let config = cb.build().map_err(|e| format!("browser config: {e}"))?;
215        let (browser, mut handler) = CdpBrowser::launch(config).await?;
216
217        let handle = tokio::spawn(async move {
218            while let Some(event) = handler.next().await {
219                if event.is_err() {
220                    break;
221                }
222            }
223        });
224
225        Ok(Browser {
226            inner: browser,
227            _handler: handle,
228            _config_path: config_path,
229        })
230    }
231}
232
233// ── Browser ─────────────────────────────────────────────────────
234
235pub struct Browser {
236    inner: CdpBrowser,
237    _handler: JoinHandle<()>,
238    _config_path: Option<PathBuf>,
239}
240
241impl Browser {
242    pub fn builder() -> BrowserBuilder {
243        BrowserBuilder::new()
244    }
245
246    /// Connect to an already-running Chrome at `ws_url`.
247    pub async fn connect(ws_url: &str) -> Result<Self> {
248        let (browser, mut handler) = CdpBrowser::connect(ws_url).await?;
249        let handle = tokio::spawn(async move {
250            while let Some(e) = handler.next().await {
251                if e.is_err() {
252                    break;
253                }
254            }
255        });
256        Ok(Self {
257            inner: browser,
258            _handler: handle,
259            _config_path: None,
260        })
261    }
262
263    /// Open a new tab and navigate to URL.
264    /// Background human simulation (mouse movement + scrolling) starts automatically.
265    pub async fn new_page(&self, url: &str) -> Result<Page> {
266        let page = self.inner.new_page(url).await?;
267        let (tx, rx) = tokio::sync::watch::channel(false);
268        let sim_page = page.clone();
269        tokio::spawn(async move {
270            human_loop(sim_page, rx, 1920.0, 1080.0).await;
271        });
272        Ok(Page { inner: page, _sim_cancel: tx })
273    }
274
275    /// Get all open pages (each gets its own background human simulation).
276    pub async fn pages(&self) -> Result<Vec<Page>> {
277        let mut result = Vec::new();
278        for p in self.inner.pages().await? {
279            let (tx, rx) = tokio::sync::watch::channel(false);
280            let sim_page = p.clone();
281            tokio::spawn(async move {
282                human_loop(sim_page, rx, 1920.0, 1080.0).await;
283            });
284            result.push(Page { inner: p, _sim_cancel: tx });
285        }
286        Ok(result)
287    }
288
289    /// Get all browser cookies.
290    pub async fn cookies(&self) -> Result<Vec<Cookie>> {
291        Ok(self.inner.get_cookies().await?)
292    }
293
294    /// Access underlying chromiumoxide Browser for raw CDP.
295    pub fn cdp(&self) -> &CdpBrowser {
296        &self.inner
297    }
298
299    /// Gracefully shut down the browser.
300    pub async fn close(mut self) -> Result<()> {
301        self.inner.close().await?;
302        let _ = self._handler.await;
303        Ok(())
304    }
305}
306
307// ── Page ────────────────────────────────────────────────────────
308
309pub struct Page {
310    inner: CdpPage,
311    _sim_cancel: tokio::sync::watch::Sender<bool>,
312}
313
314impl Page {
315    /// Navigate to URL (returns immediately, does not wait for load).
316    pub async fn navigate(&self, url: &str) -> Result<()> {
317        self.inner.execute(NavigateParams::new(url)).await?;
318        Ok(())
319    }
320
321    /// Navigate to URL and wait for page load.
322    pub async fn goto(&self, url: &str) -> Result<()> {
323        self.inner.goto(url).await?;
324        Ok(())
325    }
326
327    /// Wait for next navigation/load event.
328    pub async fn wait_for_load(&self) -> Result<()> {
329        self.inner.wait_for_navigation().await?;
330        Ok(())
331    }
332
333    /// Run JS expression in page context, return result as String.
334    pub async fn js(&self, expr: &str) -> Result<String> {
335        let result = self.inner.evaluate(expr).await?;
336        match result.value() {
337            Some(serde_json::Value::String(s)) => Ok(s.clone()),
338            Some(serde_json::Value::Null) | None => Ok(String::new()),
339            Some(v) => Ok(v.to_string()),
340        }
341    }
342
343    /// Run JS and deserialize result into `T`.
344    pub async fn js_as<T: serde::de::DeserializeOwned>(&self, expr: &str) -> Result<T> {
345        let result = self.inner.evaluate(expr).await?;
346        Ok(result.into_value()?)
347    }
348
349    /// Inject script that runs on every new document before page JS.
350    pub async fn js_on_new_document(&self, script: &str) -> Result<()> {
351        self.inner.evaluate_on_new_document(script).await?;
352        Ok(())
353    }
354
355    /// Current page URL.
356    pub async fn url(&self) -> Result<String> {
357        Ok(self.inner.url().await?.unwrap_or_default())
358    }
359
360    /// Page title.
361    pub async fn title(&self) -> Result<String> {
362        Ok(self.inner.get_title().await?.unwrap_or_default())
363    }
364
365    /// Full page HTML.
366    pub async fn html(&self) -> Result<String> {
367        Ok(self.inner.content().await?)
368    }
369
370    /// Capture PNG screenshot.
371    pub async fn screenshot(&self) -> Result<Vec<u8>> {
372        Ok(self
373            .inner
374            .screenshot(CaptureScreenshotParams::default())
375            .await?)
376    }
377
378    /// Click at (x, y) with realistic mouse movement + timing.
379    pub async fn click(&self, x: f64, y: f64) -> Result<()> {
380        self.inner
381            .execute(DispatchMouseEventParams::new(
382                DispatchMouseEventType::MouseMoved,
383                x,
384                y,
385            ))
386            .await?;
387        tokio::time::sleep(jitter(20, 60)).await;
388
389        let mut press =
390            DispatchMouseEventParams::new(DispatchMouseEventType::MousePressed, x, y);
391        press.button = Some(MouseButton::Left);
392        press.click_count = Some(1);
393        self.inner.execute(press).await?;
394        tokio::time::sleep(jitter(40, 120)).await;
395
396        let mut release =
397            DispatchMouseEventParams::new(DispatchMouseEventType::MouseReleased, x, y);
398        release.button = Some(MouseButton::Left);
399        release.click_count = Some(1);
400        self.inner.execute(release).await?;
401        Ok(())
402    }
403
404    /// Type text into focused element with realistic keystroke timing.
405    pub async fn type_text(&self, text: &str) -> Result<()> {
406        for ch in text.chars() {
407            let s = ch.to_string();
408            let mut down = DispatchKeyEventParams::new(DispatchKeyEventType::KeyDown);
409            down.text = Some(s.clone());
410            down.key = Some(s.clone());
411            self.inner.execute(down).await?;
412
413            let mut up = DispatchKeyEventParams::new(DispatchKeyEventType::KeyUp);
414            up.key = Some(s);
415            self.inner.execute(up).await?;
416
417            tokio::time::sleep(jitter(30, 130)).await;
418        }
419        Ok(())
420    }
421
422    /// Scroll by `delta_y` pixels (positive = down).
423    pub async fn scroll(&self, delta_y: f64) -> Result<()> {
424        let mut ev =
425            DispatchMouseEventParams::new(DispatchMouseEventType::MouseWheel, 400.0, 300.0);
426        ev.delta_x = Some(0.0);
427        ev.delta_y = Some(delta_y);
428        self.inner.execute(ev).await?;
429        Ok(())
430    }
431
432    /// Sleep for `ms` milliseconds.
433    pub async fn wait(&self, ms: u64) {
434        tokio::time::sleep(std::time::Duration::from_millis(ms)).await;
435    }
436
437    /// Close the page/tab.
438    pub async fn close(self) -> Result<()> {
439        self.inner.close().await?;
440        Ok(())
441    }
442
443    /// Access underlying chromiumoxide Page for raw CDP.
444    pub fn cdp(&self) -> &CdpPage {
445        &self.inner
446    }
447}
448
449// ── Human Simulation (built-in) ────────────────────────────────
450//
451// Every Page auto-starts a background task that generates realistic mouse
452// movement + scrolling via CDP. The task lives as long as the Page — when
453// the Page is dropped or the browser closes, the simulation stops.
454// Safe: only moves the cursor and scrolls — never clicks or types.
455
456/// Core simulation loop — runs until cancelled.
457async fn human_loop(
458    page: CdpPage,
459    mut cancel: tokio::sync::watch::Receiver<bool>,
460    vw: f64,
461    vh: f64,
462) {
463    let mut rng = FastRng::new(nanos() ^ 0xDEAD_BEEF);
464    let mut mouse_x: f64 = vw / 2.0;
465    let mut mouse_y: f64 = vh / 2.0;
466
467    loop {
468        if *cancel.borrow() {
469            break;
470        }
471
472        // Pick random action weighted: 60% mouse move, 25% scroll, 15% idle
473        let roll = rng.next_range(100);
474        if roll < 60 {
475            // Mouse move along a bezier curve to a random target
476            let tx = rng.next_f64() * (vw - 40.0) + 20.0;
477            let ty = rng.next_f64() * (vh - 40.0) + 20.0;
478            if bezier_move(&page, &mut cancel, &mut rng, mouse_x, mouse_y, tx, ty)
479                .await
480                .is_err()
481            {
482                break;
483            }
484            mouse_x = tx;
485            mouse_y = ty;
486        } else if roll < 85 {
487            // Scroll: small random amount, sometimes up
488            let direction = if rng.next_range(100) < 80 { 1.0 } else { -1.0 };
489            let amount = (rng.next_range(200) as f64 + 50.0) * direction;
490            let mut ev = DispatchMouseEventParams::new(
491                DispatchMouseEventType::MouseWheel,
492                mouse_x,
493                mouse_y,
494            );
495            ev.delta_x = Some(0.0);
496            ev.delta_y = Some(amount);
497            let _ = page.execute(ev).await;
498        } else {
499            // Idle — just wait (simulates reading)
500        }
501
502        // Pause between actions: 800ms–4s
503        let delay = rng.next_range(3200) as u64 + 800;
504        tokio::select! {
505            _ = tokio::time::sleep(Duration::from_millis(delay)) => {}
506            _ = cancel.changed() => break,
507        }
508    }
509}
510
511/// Move mouse along a quadratic bezier curve from (sx,sy) to (tx,ty).
512async fn bezier_move(
513    page: &CdpPage,
514    cancel: &mut tokio::sync::watch::Receiver<bool>,
515    rng: &mut FastRng,
516    sx: f64,
517    sy: f64,
518    tx: f64,
519    ty: f64,
520) -> std::result::Result<(), ()> {
521    // Random control point (gives the curve a natural arc)
522    let cx = (sx + tx) / 2.0 + (rng.next_f64() - 0.5) * 200.0;
523    let cy = (sy + ty) / 2.0 + (rng.next_f64() - 0.5) * 200.0;
524
525    let steps = rng.next_range(10) + 8; // 8–17 intermediate points
526    for i in 1..=steps {
527        if *cancel.borrow() {
528            return Err(());
529        }
530        let t = i as f64 / steps as f64;
531        let inv = 1.0 - t;
532        // Quadratic bezier: B(t) = (1-t)²·S + 2(1-t)t·C + t²·T
533        let x = inv * inv * sx + 2.0 * inv * t * cx + t * t * tx;
534        let y = inv * inv * sy + 2.0 * inv * t * cy + t * t * ty;
535
536        let ev =
537            DispatchMouseEventParams::new(DispatchMouseEventType::MouseMoved, x, y);
538        let _ = page.execute(ev).await;
539
540        // Inter-step delay: 8–25ms (realistic mouse polling rate)
541        let step_delay = rng.next_range(17) as u64 + 8;
542        tokio::select! {
543            _ = tokio::time::sleep(Duration::from_millis(step_delay)) => {}
544            _ = cancel.changed() => return Err(()),
545        }
546    }
547    Ok(())
548}
549
550// ── Fast PRNG ──────────────────────────────────────────────────
551
552struct FastRng {
553    state: u64,
554}
555
556impl FastRng {
557    fn new(seed: u64) -> Self {
558        Self {
559            state: if seed == 0 { 0x1234_5678_9ABC_DEF0 } else { seed },
560        }
561    }
562
563    fn next(&mut self) -> u64 {
564        let mut s = self.state;
565        s ^= s << 13;
566        s ^= s >> 7;
567        s ^= s << 17;
568        self.state = s;
569        s
570    }
571
572    fn next_range(&mut self, max: u32) -> u32 {
573        (self.next() % max as u64) as u32
574    }
575
576    fn next_f64(&mut self) -> f64 {
577        (self.next() & 0x000F_FFFF_FFFF_FFFF) as f64 / (0x0010_0000_0000_0000u64 as f64)
578    }
579}
580
581// ── Helpers ─────────────────────────────────────────────────────
582
583fn nanos() -> u64 {
584    std::time::SystemTime::now()
585        .duration_since(std::time::UNIX_EPOCH)
586        .unwrap()
587        .subsec_nanos() as u64
588}
589
590fn jitter(min_ms: u64, max_ms: u64) -> std::time::Duration {
591    std::time::Duration::from_millis(min_ms + nanos() % (max_ms - min_ms))
592}