Skip to main content

clawser_browser/
lib.rs

1//! Antidetect browser automation powered by chromiumoxide CDP.
2//!
3//! ```rust,no_run
4//! use clawser_browser::Browser;
5//!
6//! #[tokio::main]
7//! async fn main() -> clawser_browser::Result<()> {
8//!     let browser = Browser::builder()
9//!         .headful()
10//!         .profile(7, 777)
11//!         .build().await?;
12//!
13//!     let page = browser.new_page("https://example.com").await?;
14//!     let title = page.js("document.title").await?;
15//!     println!("{title}");
16//!     browser.close().await
17//! }
18//! ```
19
20mod profile;
21
22pub use profile::{generate_config_json, random_profile, write_config_file, HwProfile, PROFILES};
23
24use chromiumoxide::browser::{Browser as CdpBrowser, BrowserConfig};
25use chromiumoxide::cdp::browser_protocol::input::{
26    DispatchKeyEventParams, DispatchKeyEventType, DispatchMouseEventParams,
27    DispatchMouseEventType, MouseButton,
28};
29use chromiumoxide::cdp::browser_protocol::network::Cookie;
30use chromiumoxide::cdp::browser_protocol::page::{CaptureScreenshotParams, NavigateParams};
31use chromiumoxide::Page as CdpPage;
32use futures_util::StreamExt;
33use std::path::PathBuf;
34use tokio::task::JoinHandle;
35
36pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
37
38// ── BrowserBuilder ──────────────────────────────────────────────
39
40pub struct BrowserBuilder {
41    headless: bool,
42    chrome_path: Option<String>,
43    profile_choice: ProfileChoice,
44    user_data_dir: Option<String>,
45    window_size: (u32, u32),
46    extra_args: Vec<String>,
47}
48
49enum ProfileChoice {
50    None,
51    Random,
52    Indexed { index: usize, seed: u64 },
53    ConfigFile(String),
54}
55
56impl Default for BrowserBuilder {
57    fn default() -> Self {
58        Self {
59            headless: true,
60            chrome_path: None,
61            profile_choice: ProfileChoice::None,
62            user_data_dir: None,
63            window_size: (1920, 1080),
64            extra_args: Vec::new(),
65        }
66    }
67}
68
69impl BrowserBuilder {
70    pub fn new() -> Self {
71        Self::default()
72    }
73
74    /// Run with visible window.
75    pub fn headful(mut self) -> Self {
76        self.headless = false;
77        self
78    }
79
80    /// Run in headless mode (default).
81    pub fn headless(mut self) -> Self {
82        self.headless = true;
83        self
84    }
85
86    /// Path to chrome.exe. Falls back to `CLAWSER_CHROME_PATH` env.
87    pub fn chrome_path(mut self, path: impl Into<String>) -> Self {
88        self.chrome_path = Some(path.into());
89        self
90    }
91
92    /// Use hardware profile `index` (0..99) with deterministic `seed`.
93    pub fn profile(mut self, index: usize, seed: u64) -> Self {
94        self.profile_choice = ProfileChoice::Indexed { index, seed };
95        self
96    }
97
98    /// Use a random hardware profile.
99    pub fn random(mut self) -> Self {
100        self.profile_choice = ProfileChoice::Random;
101        self
102    }
103
104    /// Use a custom clawser config JSON file path.
105    pub fn config(mut self, path: impl Into<String>) -> Self {
106        self.profile_choice = ProfileChoice::ConfigFile(path.into());
107        self
108    }
109
110    /// Persistent user data dir for cookies/storage.
111    pub fn user_data_dir(mut self, path: impl Into<String>) -> Self {
112        self.user_data_dir = Some(path.into());
113        self
114    }
115
116    /// Window size (default 1920x1080).
117    pub fn window_size(mut self, w: u32, h: u32) -> Self {
118        self.window_size = (w, h);
119        self
120    }
121
122    /// Add a Chrome launch argument (e.g. `"disable-gpu"`).
123    pub fn arg(mut self, arg: impl Into<String>) -> Self {
124        self.extra_args.push(arg.into());
125        self
126    }
127
128    /// Launch the browser.
129    pub async fn build(self) -> Result<Browser> {
130        let chrome_path = self
131            .chrome_path
132            .or_else(|| std::env::var("CLAWSER_CHROME_PATH").ok())
133            .ok_or("set CLAWSER_CHROME_PATH env or call .chrome_path()")?;
134
135        let (config_path, profile_id) = match self.profile_choice {
136            ProfileChoice::Random => {
137                let (idx, seed) = profile::random_profile();
138                let p = profile::write_config_file(idx, seed)?;
139                (Some(p), Some(format!("clawser_{idx}_{seed}")))
140            }
141            ProfileChoice::Indexed { index, seed } => {
142                let p = profile::write_config_file(index, seed)?;
143                (Some(p), Some(format!("clawser_{index}_{seed}")))
144            }
145            ProfileChoice::ConfigFile(ref path) => (Some(PathBuf::from(path)), None),
146            ProfileChoice::None => (None, None),
147        };
148
149        let mut cb = BrowserConfig::builder()
150            .chrome_executable(&chrome_path)
151            .disable_default_args()
152            .no_sandbox()
153            .with_head()
154            .window_size(self.window_size.0, self.window_size.1)
155            .viewport(None);
156
157        if self.headless {
158            cb = cb.arg(("headless", "new"));
159        }
160
161        if let Some(ref cp) = config_path {
162            let p = cp.to_string_lossy().replace('/', "\\");
163            cb = cb.arg(("clawser-config", p.as_str()));
164        }
165
166        if let Some(ref udd) = self.user_data_dir {
167            cb = cb.user_data_dir(udd);
168        } else if let Some(ref id) = profile_id {
169            // Store profiles next to the chrome exe (project dir), not in %TEMP%.
170            let profiles_dir = std::path::Path::new(&chrome_path)
171                .parent()
172                .unwrap_or(std::path::Path::new("."))
173                .join("clawser_profiles")
174                .join(id);
175            cb = cb.user_data_dir(profiles_dir);
176        }
177
178        cb = cb
179            .arg(("disable-blink-features", "AutomationControlled"))
180            .arg(("remote-allow-origins", "*"))
181            .arg("no-first-run")
182            .arg("no-default-browser-check");
183
184        for a in &self.extra_args {
185            cb = cb.arg(a.as_str());
186        }
187
188        let config = cb.build().map_err(|e| format!("browser config: {e}"))?;
189        let (browser, mut handler) = CdpBrowser::launch(config).await?;
190
191        let handle = tokio::spawn(async move {
192            while let Some(event) = handler.next().await {
193                if event.is_err() {
194                    break;
195                }
196            }
197        });
198
199        Ok(Browser {
200            inner: browser,
201            _handler: handle,
202            _config_path: config_path,
203        })
204    }
205}
206
207// ── Browser ─────────────────────────────────────────────────────
208
209pub struct Browser {
210    inner: CdpBrowser,
211    _handler: JoinHandle<()>,
212    _config_path: Option<PathBuf>,
213}
214
215impl Browser {
216    pub fn builder() -> BrowserBuilder {
217        BrowserBuilder::new()
218    }
219
220    /// Connect to an already-running Chrome at `ws_url`.
221    pub async fn connect(ws_url: &str) -> Result<Self> {
222        let (browser, mut handler) = CdpBrowser::connect(ws_url).await?;
223        let handle = tokio::spawn(async move {
224            while let Some(e) = handler.next().await {
225                if e.is_err() {
226                    break;
227                }
228            }
229        });
230        Ok(Self {
231            inner: browser,
232            _handler: handle,
233            _config_path: None,
234        })
235    }
236
237    /// Open a new tab and navigate to URL.
238    pub async fn new_page(&self, url: &str) -> Result<Page> {
239        let page = self.inner.new_page(url).await?;
240        Ok(Page { inner: page })
241    }
242
243    /// Get all open pages.
244    pub async fn pages(&self) -> Result<Vec<Page>> {
245        Ok(self
246            .inner
247            .pages()
248            .await?
249            .into_iter()
250            .map(|p| Page { inner: p })
251            .collect())
252    }
253
254    /// Get all browser cookies.
255    pub async fn cookies(&self) -> Result<Vec<Cookie>> {
256        Ok(self.inner.get_cookies().await?)
257    }
258
259    /// Access underlying chromiumoxide Browser for raw CDP.
260    pub fn cdp(&self) -> &CdpBrowser {
261        &self.inner
262    }
263
264    /// Gracefully shut down the browser.
265    pub async fn close(mut self) -> Result<()> {
266        self.inner.close().await?;
267        let _ = self._handler.await;
268        Ok(())
269    }
270}
271
272// ── Page ────────────────────────────────────────────────────────
273
274pub struct Page {
275    inner: CdpPage,
276}
277
278impl Page {
279    /// Navigate to URL (returns immediately, does not wait for load).
280    pub async fn navigate(&self, url: &str) -> Result<()> {
281        self.inner.execute(NavigateParams::new(url)).await?;
282        Ok(())
283    }
284
285    /// Navigate to URL and wait for page load.
286    pub async fn goto(&self, url: &str) -> Result<()> {
287        self.inner.goto(url).await?;
288        Ok(())
289    }
290
291    /// Wait for next navigation/load event.
292    pub async fn wait_for_load(&self) -> Result<()> {
293        self.inner.wait_for_navigation().await?;
294        Ok(())
295    }
296
297    /// Run JS expression in page context, return result as String.
298    pub async fn js(&self, expr: &str) -> Result<String> {
299        let result = self.inner.evaluate(expr).await?;
300        match result.value() {
301            Some(serde_json::Value::String(s)) => Ok(s.clone()),
302            Some(serde_json::Value::Null) | None => Ok(String::new()),
303            Some(v) => Ok(v.to_string()),
304        }
305    }
306
307    /// Run JS and deserialize result into `T`.
308    pub async fn js_as<T: serde::de::DeserializeOwned>(&self, expr: &str) -> Result<T> {
309        let result = self.inner.evaluate(expr).await?;
310        Ok(result.into_value()?)
311    }
312
313    /// Inject script that runs on every new document before page JS.
314    pub async fn js_on_new_document(&self, script: &str) -> Result<()> {
315        self.inner.evaluate_on_new_document(script).await?;
316        Ok(())
317    }
318
319    /// Current page URL.
320    pub async fn url(&self) -> Result<String> {
321        Ok(self.inner.url().await?.unwrap_or_default())
322    }
323
324    /// Page title.
325    pub async fn title(&self) -> Result<String> {
326        Ok(self.inner.get_title().await?.unwrap_or_default())
327    }
328
329    /// Full page HTML.
330    pub async fn html(&self) -> Result<String> {
331        Ok(self.inner.content().await?)
332    }
333
334    /// Capture PNG screenshot.
335    pub async fn screenshot(&self) -> Result<Vec<u8>> {
336        Ok(self
337            .inner
338            .screenshot(CaptureScreenshotParams::default())
339            .await?)
340    }
341
342    /// Click at (x, y) with realistic mouse movement + timing.
343    pub async fn click(&self, x: f64, y: f64) -> Result<()> {
344        self.inner
345            .execute(DispatchMouseEventParams::new(
346                DispatchMouseEventType::MouseMoved,
347                x,
348                y,
349            ))
350            .await?;
351        tokio::time::sleep(jitter(20, 60)).await;
352
353        let mut press =
354            DispatchMouseEventParams::new(DispatchMouseEventType::MousePressed, x, y);
355        press.button = Some(MouseButton::Left);
356        press.click_count = Some(1);
357        self.inner.execute(press).await?;
358        tokio::time::sleep(jitter(40, 120)).await;
359
360        let mut release =
361            DispatchMouseEventParams::new(DispatchMouseEventType::MouseReleased, x, y);
362        release.button = Some(MouseButton::Left);
363        release.click_count = Some(1);
364        self.inner.execute(release).await?;
365        Ok(())
366    }
367
368    /// Type text into focused element with realistic keystroke timing.
369    pub async fn type_text(&self, text: &str) -> Result<()> {
370        for ch in text.chars() {
371            let s = ch.to_string();
372            let mut down = DispatchKeyEventParams::new(DispatchKeyEventType::KeyDown);
373            down.text = Some(s.clone());
374            down.key = Some(s.clone());
375            self.inner.execute(down).await?;
376
377            let mut up = DispatchKeyEventParams::new(DispatchKeyEventType::KeyUp);
378            up.key = Some(s);
379            self.inner.execute(up).await?;
380
381            tokio::time::sleep(jitter(30, 130)).await;
382        }
383        Ok(())
384    }
385
386    /// Scroll by `delta_y` pixels (positive = down).
387    pub async fn scroll(&self, delta_y: f64) -> Result<()> {
388        let mut ev =
389            DispatchMouseEventParams::new(DispatchMouseEventType::MouseWheel, 400.0, 300.0);
390        ev.delta_x = Some(0.0);
391        ev.delta_y = Some(delta_y);
392        self.inner.execute(ev).await?;
393        Ok(())
394    }
395
396    /// Sleep for `ms` milliseconds.
397    pub async fn wait(&self, ms: u64) {
398        tokio::time::sleep(std::time::Duration::from_millis(ms)).await;
399    }
400
401    /// Close the page/tab.
402    pub async fn close(self) -> Result<()> {
403        self.inner.close().await?;
404        Ok(())
405    }
406
407    /// Access underlying chromiumoxide Page for raw CDP.
408    pub fn cdp(&self) -> &CdpPage {
409        &self.inner
410    }
411}
412
413// ── Helpers ─────────────────────────────────────────────────────
414
415fn nanos() -> u64 {
416    std::time::SystemTime::now()
417        .duration_since(std::time::UNIX_EPOCH)
418        .unwrap()
419        .subsec_nanos() as u64
420}
421
422fn jitter(min_ms: u64, max_ms: u64) -> std::time::Duration {
423    std::time::Duration::from_millis(min_ms + nanos() % (max_ms - min_ms))
424}