Skip to main content

clawser_browser/
lib.rs

1//! # just-fetch
2//!
3//! Async antidetect browser powered by a patched Chromium + CDP.
4//! Native tokio support — all methods are `async`.
5//!
6//! ```no_run
7//! use just_fetch::Browser;
8//!
9//! #[tokio::main]
10//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
11//!     let browser = Browser::builder()
12//!         .headful()
13//!         .random()
14//!         .build().await?;
15//!     let page = browser.navigate("https://example.com").await?;
16//!     page.human_idle(2000).await?;
17//!     let title = page.js("document.title").await?;
18//!     println!("{}", title);
19//!     browser.shutdown().await?;
20//!     Ok(())
21//! }
22//! ```
23
24mod process;
25pub mod profiles;
26mod protocol;
27pub mod types;
28
29pub use profiles::HwProfile;
30pub use types::{Cookie, Response};
31
32use std::io;
33use std::time::Duration;
34
35use tokio::process::Child;
36use tokio::sync::Mutex;
37
38/// Builder for creating a browser instance.
39pub struct BrowserBuilder {
40    headless: bool,
41    config_path: Option<String>,
42    profile_index: Option<usize>,
43    seed_index: Option<u64>,
44}
45
46/// A browser instance = 1 chrome.exe process + CDP connection.
47pub struct Browser {
48    child: Mutex<Child>,
49    #[allow(dead_code)]
50    cdp_port: u16,
51    ws: Mutex<protocol::CdpSocket>,
52}
53
54/// A loaded page within the browser.
55pub struct Page<'b> {
56    browser: &'b Browser,
57    _target_id: String,
58}
59
60// --- BrowserBuilder ---
61
62impl BrowserBuilder {
63    /// Run with a visible browser window (default is headless).
64    pub fn headful(mut self) -> Self {
65        self.headless = false;
66        self
67    }
68
69    /// Run headless (no window). This is the default.
70    pub fn headless(mut self) -> Self {
71        self.headless = true;
72        self
73    }
74
75    /// Path to clawser config JSON (antidetect profile).
76    pub fn config(mut self, path: &str) -> Self {
77        self.config_path = Some(path.to_string());
78        self
79    }
80
81    /// Use a random profile from the 100 built-in device profiles.
82    pub fn random(mut self) -> Self {
83        self.profile_index = Some(profiles::random_profile_index());
84        self.seed_index = Some(profiles::random_seed_index());
85        self
86    }
87
88    /// Use a specific profile (0..99) with a specific seed.
89    /// Same (profile, seed) always produces the same fingerprint.
90    pub fn profile(mut self, profile_index: usize, seed_index: u64) -> Self {
91        self.profile_index = Some(profile_index);
92        self.seed_index = Some(seed_index);
93        self
94    }
95
96    /// Spawn chrome.exe and connect via CDP.
97    pub async fn build(self) -> io::Result<Browser> {
98        // If profile specified, generate config to temp file
99        let generated_path;
100        let config_path = if let (Some(pi), Some(si)) = (self.profile_index, self.seed_index) {
101            generated_path = profiles::write_config_file(pi, si)?;
102            generated_path.as_str()
103        } else {
104            generated_path = String::new();
105            self.config_path.as_deref().unwrap_or("")
106        };
107
108        let profile_id = match (self.profile_index, self.seed_index) {
109            (Some(pi), Some(si)) => Some(format!("p{}-s{}", pi, si)),
110            _ => None,
111        };
112
113        let cdp_port = process::pick_free_port()?;
114        let child = process::spawn_chrome(
115            self.headless,
116            cdp_port,
117            config_path,
118            profile_id.as_deref(),
119        ).await?;
120
121        // Wait for CDP to be ready
122        process::wait_for_cdp(cdp_port, Duration::from_secs(30)).await?;
123
124        // Connect to the first page's WebSocket
125        let ws_url = process::get_page_ws_url(cdp_port).await?;
126        let ws = protocol::connect_cdp(&ws_url).await?;
127
128        Ok(Browser {
129            child: Mutex::new(child),
130            cdp_port,
131            ws: Mutex::new(ws),
132        })
133    }
134}
135
136// --- Browser ---
137
138impl Browser {
139    /// Create a headless browser with default config.
140    pub async fn new() -> io::Result<Browser> {
141        Self::builder().build().await
142    }
143
144    /// Create a builder for fine-grained control.
145    pub fn builder() -> BrowserBuilder {
146        BrowserBuilder {
147            headless: true,
148            config_path: None,
149            profile_index: None,
150            seed_index: None,
151        }
152    }
153
154    /// Navigate the current page to a URL.
155    pub async fn navigate(&self, url: &str) -> io::Result<Page<'_>> {
156        {
157            let mut ws = self.ws.lock().await;
158            protocol::call_cdp(&mut ws, "Page.enable", serde_json::json!({})).await?;
159            let resp = protocol::call_cdp(
160                &mut ws,
161                "Page.navigate",
162                serde_json::json!({"url": url}),
163            )
164            .await?;
165
166            let _target_id = resp
167                .get("result")
168                .and_then(|r| r.get("frameId"))
169                .and_then(|v| v.as_str())
170                .unwrap_or("main")
171                .to_string();
172        }
173
174        // Wait for load event
175        self.wait_for_load(Duration::from_secs(30)).await?;
176
177        Ok(Page {
178            browser: self,
179            _target_id: String::new(),
180        })
181    }
182
183    /// Wait for Page.loadEventFired.
184    async fn wait_for_load(&self, timeout: Duration) -> io::Result<()> {
185        use futures_util::StreamExt;
186        let start = std::time::Instant::now();
187        let mut ws = self.ws.lock().await;
188
189        loop {
190            if start.elapsed() > timeout {
191                return Ok(()); // Timeout not fatal
192            }
193
194            let read_result = tokio::time::timeout(
195                Duration::from_secs(1),
196                ws.next(),
197            )
198            .await;
199
200            match read_result {
201                Ok(Some(Ok(tokio_tungstenite::tungstenite::Message::Text(text)))) => {
202                    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&text) {
203                        let method = parsed.get("method").and_then(|v| v.as_str()).unwrap_or("");
204                        if method == "Page.loadEventFired"
205                            || method == "Page.frameStoppedLoading"
206                        {
207                            return Ok(());
208                        }
209                    }
210                }
211                Err(_) => continue,   // timeout — keep waiting
212                Ok(None) => return Ok(()), // stream ended
213                _ => {}
214            }
215        }
216    }
217
218    /// Get cookies for a URL (or all if empty).
219    pub async fn cookies(&self, url: &str) -> io::Result<Vec<Cookie>> {
220        let mut ws = self.ws.lock().await;
221        let params = if url.is_empty() {
222            serde_json::json!({})
223        } else {
224            serde_json::json!({"urls": [url]})
225        };
226        let resp = protocol::call_cdp(&mut ws, "Network.getCookies", params).await?;
227        let cookies: Vec<Cookie> = resp
228            .get("result")
229            .and_then(|r| r.get("cookies"))
230            .cloned()
231            .map(|v| serde_json::from_value(v).unwrap_or_default())
232            .unwrap_or_default();
233        Ok(cookies)
234    }
235
236    /// Take a screenshot (PNG bytes).
237    pub async fn screenshot(&self) -> io::Result<Vec<u8>> {
238        let mut ws = self.ws.lock().await;
239        let resp = protocol::call_cdp(
240            &mut ws,
241            "Page.captureScreenshot",
242            serde_json::json!({"format": "png"}),
243        )
244        .await?;
245        let data = resp
246            .get("result")
247            .and_then(|r| r.get("data"))
248            .and_then(|v| v.as_str())
249            .unwrap_or("");
250        base64_decode(data)
251    }
252
253    /// Shut down the browser process cleanly.
254    pub async fn shutdown(self) -> io::Result<()> {
255        {
256            let mut ws = self.ws.lock().await;
257            let _ = protocol::call_cdp(&mut ws, "Browser.close", serde_json::json!({})).await;
258        }
259        let mut child = self.child.lock().await;
260        let _ = child.wait().await;
261        Ok(())
262    }
263}
264
265impl Drop for Browser {
266    fn drop(&mut self) {
267        if let Ok(mut child) = self.child.try_lock() {
268            let _ = child.start_kill();
269        }
270    }
271}
272
273// --- Page ---
274
275impl<'b> Page<'b> {
276    /// Move mouse along a human-like bezier curve to (x, y).
277    pub async fn mouse_move(&self, x: f64, y: f64, steps: u32) -> io::Result<()> {
278        let mut ws = self.browser.ws.lock().await;
279
280        let mut rng_buf = [0u8; 16];
281        profiles::getrandom(&mut rng_buf);
282        let r0 = u64::from_le_bytes(rng_buf[0..8].try_into().unwrap());
283        let r1 = u64::from_le_bytes(rng_buf[8..16].try_into().unwrap());
284
285        let start_x = (r0 % 400) as f64 + 100.0;
286        let start_y = (r1 % 300) as f64 + 100.0;
287
288        let cp1x = start_x + (x - start_x) * 0.3 + ((r0 >> 16) % 80) as f64 - 40.0;
289        let cp1y = start_y + (y - start_y) * 0.1 + ((r0 >> 24) % 60) as f64 - 30.0;
290        let cp2x = start_x + (x - start_x) * 0.7 + ((r1 >> 16) % 60) as f64 - 30.0;
291        let cp2y = start_y + (y - start_y) * 0.9 + ((r1 >> 24) % 40) as f64 - 20.0;
292
293        let steps = steps.max(5);
294        for i in 0..=steps {
295            let t = i as f64 / steps as f64;
296            let u = 1.0 - t;
297            let px = u * u * u * start_x + 3.0 * u * u * t * cp1x + 3.0 * u * t * t * cp2x + t * t * t * x;
298            let py = u * u * u * start_y + 3.0 * u * u * t * cp1y + 3.0 * u * t * t * cp2y + t * t * t * y;
299
300            protocol::call_cdp(&mut ws, "Input.dispatchMouseEvent", serde_json::json!({
301                "type": "mouseMoved", "x": px.round(), "y": py.round(),
302            }))
303            .await?;
304
305            let base_ms = 5 + ((r0.wrapping_add(i as u64 * 7)) % 12);
306            tokio::time::sleep(Duration::from_millis(base_ms)).await;
307        }
308        Ok(())
309    }
310
311    /// Click at (x, y) with human-like mouse movement first.
312    pub async fn click(&self, x: f64, y: f64) -> io::Result<()> {
313        self.mouse_move(x, y, 15).await?;
314
315        let mut ws = self.browser.ws.lock().await;
316        protocol::call_cdp(&mut ws, "Input.dispatchMouseEvent", serde_json::json!({
317            "type": "mousePressed", "x": x, "y": y, "button": "left", "clickCount": 1,
318        }))
319        .await?;
320
321        let mut buf = [0u8; 8];
322        profiles::getrandom(&mut buf);
323        let hold_ms = 50 + (u64::from_le_bytes(buf) % 70);
324        tokio::time::sleep(Duration::from_millis(hold_ms)).await;
325
326        protocol::call_cdp(&mut ws, "Input.dispatchMouseEvent", serde_json::json!({
327            "type": "mouseReleased", "x": x, "y": y, "button": "left", "clickCount": 1,
328        }))
329        .await?;
330        Ok(())
331    }
332
333    /// Type text with human-like timing.
334    pub async fn type_text(&self, text: &str) -> io::Result<()> {
335        let mut ws = self.browser.ws.lock().await;
336        let mut rng_buf = [0u8; 8];
337
338        for ch in text.chars() {
339            let key_str = ch.to_string();
340
341            protocol::call_cdp(&mut ws, "Input.dispatchKeyEvent", serde_json::json!({
342                "type": "keyDown", "text": key_str, "key": key_str,
343            }))
344            .await?;
345
346            profiles::getrandom(&mut rng_buf);
347            let dwell = 30 + (u64::from_le_bytes(rng_buf) % 50);
348            tokio::time::sleep(Duration::from_millis(dwell)).await;
349
350            protocol::call_cdp(&mut ws, "Input.dispatchKeyEvent", serde_json::json!({
351                "type": "keyUp", "key": key_str,
352            }))
353            .await?;
354
355            profiles::getrandom(&mut rng_buf);
356            let gap = 40 + (u64::from_le_bytes(rng_buf) % 140);
357            tokio::time::sleep(Duration::from_millis(gap)).await;
358        }
359        Ok(())
360    }
361
362    /// Scroll with human-like momentum.
363    pub async fn scroll(&self, delta_y: i32) -> io::Result<()> {
364        let mut ws = self.browser.ws.lock().await;
365        let mut rng_buf = [0u8; 8];
366        profiles::getrandom(&mut rng_buf);
367
368        let steps = 5 + (u64::from_le_bytes(rng_buf) % 6) as i32;
369        let base_delta = delta_y / steps;
370        let mut remaining = delta_y;
371
372        let mx = 400.0 + (u64::from_le_bytes(rng_buf) % 300) as f64;
373        let my = 300.0 + ((u64::from_le_bytes(rng_buf) >> 16) % 200) as f64;
374
375        for i in 0..steps {
376            let this_delta = if i == steps - 1 {
377                remaining
378            } else {
379                profiles::getrandom(&mut rng_buf);
380                let jitter = (u64::from_le_bytes(rng_buf) % 20) as i32 - 10;
381                let d = base_delta + jitter;
382                remaining -= d;
383                d
384            };
385
386            protocol::call_cdp(&mut ws, "Input.dispatchMouseEvent", serde_json::json!({
387                "type": "mouseWheel", "x": mx, "y": my, "deltaX": 0, "deltaY": this_delta,
388            }))
389            .await?;
390
391            let delay = 30 + (20 / (i + 1)) as u64;
392            tokio::time::sleep(Duration::from_millis(delay)).await;
393        }
394        Ok(())
395    }
396
397    /// Simulate idle human presence with random mouse movements.
398    pub async fn human_idle(&self, duration_ms: u64) -> io::Result<()> {
399        let start = std::time::Instant::now();
400        let mut rng_buf = [0u8; 16];
401
402        while (start.elapsed().as_millis() as u64) < duration_ms {
403            profiles::getrandom(&mut rng_buf);
404            let x = 200.0 + (u64::from_le_bytes(rng_buf[0..8].try_into().unwrap()) % 800) as f64;
405            let y = 150.0 + (u64::from_le_bytes(rng_buf[8..16].try_into().unwrap()) % 500) as f64;
406
407            self.mouse_move(x, y, 8).await?;
408
409            profiles::getrandom(&mut rng_buf);
410            let pause = 500 + (u64::from_le_bytes(rng_buf[0..8].try_into().unwrap()) % 1500);
411            let remaining = duration_ms.saturating_sub(start.elapsed().as_millis() as u64);
412            tokio::time::sleep(Duration::from_millis(pause.min(remaining))).await;
413        }
414        Ok(())
415    }
416
417    /// Capture full page as MHTML (HTML + JS + CSS + images — everything).
418    /// Save to .mhtml file, opens in Chrome with full fidelity.
419    pub async fn capture_mhtml(&self) -> io::Result<Vec<u8>> {
420        let mut ws = self.browser.ws.lock().await;
421        let resp = protocol::call_cdp(
422            &mut ws,
423            "Page.captureSnapshot",
424            serde_json::json!({"format": "mhtml"}),
425        )
426        .await?;
427        let data = resp
428            .get("result")
429            .and_then(|r| r.get("data"))
430            .and_then(|v| v.as_str())
431            .unwrap_or("");
432        Ok(data.as_bytes().to_vec())
433    }
434
435    /// Capture page HTML via DOM API (won't break JS/special chars).
436    /// Returns outerHTML including all `<script>` tags intact.
437    pub async fn capture_html(&self) -> io::Result<String> {
438        let mut ws = self.browser.ws.lock().await;
439        let doc = protocol::call_cdp(
440            &mut ws,
441            "DOM.getDocument",
442            serde_json::json!({"depth": 0}),
443        )
444        .await?;
445        let node_id = doc
446            .get("result")
447            .and_then(|r| r.get("root"))
448            .and_then(|r| r.get("nodeId"))
449            .and_then(|v| v.as_i64())
450            .ok_or_else(|| io::Error::other("DOM.getDocument: no root nodeId"))?;
451
452        let resp = protocol::call_cdp(
453            &mut ws,
454            "DOM.getOuterHTML",
455            serde_json::json!({"nodeId": node_id}),
456        )
457        .await?;
458        Ok(resp
459            .get("result")
460            .and_then(|r| r.get("outerHTML"))
461            .and_then(|v| v.as_str())
462            .unwrap_or("")
463            .to_string())
464    }
465
466    /// Execute JavaScript and return the result as a string.
467    pub async fn js(&self, code: &str) -> io::Result<String> {
468        let mut ws = self.browser.ws.lock().await;
469        let resp = protocol::call_cdp(
470            &mut ws,
471            "Runtime.evaluate",
472            serde_json::json!({
473                "expression": code,
474                "returnByValue": true,
475            }),
476        )
477        .await?;
478
479        let result = resp.get("result").and_then(|r| r.get("result"));
480
481        if let Some(result) = result {
482            if let Some(exception) = resp.get("result").and_then(|r| r.get("exceptionDetails")) {
483                let msg = exception
484                    .get("text")
485                    .and_then(|v| v.as_str())
486                    .unwrap_or("JS exception");
487                return Err(io::Error::other(format!("JS error: {}", msg)));
488            }
489
490            match result.get("value") {
491                Some(serde_json::Value::String(s)) => Ok(s.clone()),
492                Some(v) => Ok(v.to_string()),
493                None => {
494                    let type_str = result.get("type").and_then(|v| v.as_str()).unwrap_or("");
495                    if type_str == "undefined" {
496                        Ok("undefined".to_string())
497                    } else {
498                        Ok(result
499                            .get("description")
500                            .and_then(|v| v.as_str())
501                            .unwrap_or("")
502                            .to_string())
503                    }
504                }
505            }
506        } else {
507            Ok(String::new())
508        }
509    }
510}
511
512/// Minimal base64 decoder.
513fn base64_decode(input: &str) -> io::Result<Vec<u8>> {
514    const TABLE: &[u8; 64] =
515        b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
516    let mut lookup = [255u8; 256];
517    for (i, &c) in TABLE.iter().enumerate() {
518        lookup[c as usize] = i as u8;
519    }
520    let input = input.as_bytes();
521    let mut out = Vec::with_capacity(input.len() * 3 / 4);
522    let mut buf: u32 = 0;
523    let mut bits: u32 = 0;
524    for &b in input {
525        if b == b'=' || b == b'\n' || b == b'\r' {
526            continue;
527        }
528        let val = lookup[b as usize];
529        if val == 255 {
530            return Err(io::Error::other(format!("invalid base64 char: {}", b as char)));
531        }
532        buf = (buf << 6) | val as u32;
533        bits += 6;
534        if bits >= 8 {
535            bits -= 8;
536            out.push((buf >> bits) as u8);
537            buf &= (1 << bits) - 1;
538        }
539    }
540    Ok(out)
541}