1mod client;
21mod profile;
22
23pub use client::{HttpClient, HttpClientBuilder};
24pub use profile::{generate_config_json, random_profile, write_config_file, HwProfile, PROFILES};
25
26pub use wreq;
28pub use wreq::header;
29
30use chromiumoxide::browser::{Browser as CdpBrowser, BrowserConfig};
31use chromiumoxide::cdp::browser_protocol::input::{
32 DispatchKeyEventParams, DispatchKeyEventType, DispatchMouseEventParams,
33 DispatchMouseEventType, MouseButton,
34};
35use chromiumoxide::cdp::browser_protocol::network::Cookie;
36use chromiumoxide::cdp::browser_protocol::page::{CaptureScreenshotParams, NavigateParams};
37use chromiumoxide::Page as CdpPage;
38use futures_util::StreamExt;
39use std::path::PathBuf;
40use std::time::Duration;
41use tokio::task::JoinHandle;
42
43pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
44
45pub struct BrowserBuilder {
48 headless: bool,
49 chrome_path: Option<String>,
50 profile_choice: ProfileChoice,
51 user_data_dir: Option<String>,
52 window_size: (u32, u32),
53 proxy: Option<String>,
54 extra_args: Vec<String>,
55}
56
57enum ProfileChoice {
58 None,
59 Random,
60 Indexed { index: usize, seed: u64 },
61 ConfigFile(String),
62}
63
64impl Default for BrowserBuilder {
65 fn default() -> Self {
66 Self {
67 headless: true,
68 chrome_path: None,
69 profile_choice: ProfileChoice::None,
70 user_data_dir: None,
71 window_size: (1920, 1080),
72 proxy: None,
73 extra_args: Vec::new(),
74 }
75 }
76}
77
78impl BrowserBuilder {
79 pub fn new() -> Self {
80 Self::default()
81 }
82
83 pub fn headful(mut self) -> Self {
85 self.headless = false;
86 self
87 }
88
89 pub fn headless(mut self) -> Self {
91 self.headless = true;
92 self
93 }
94
95 pub fn chrome_path(mut self, path: impl Into<String>) -> Self {
97 self.chrome_path = Some(path.into());
98 self
99 }
100
101 pub fn profile(mut self, index: usize, seed: u64) -> Self {
103 self.profile_choice = ProfileChoice::Indexed { index, seed };
104 self
105 }
106
107 pub fn random(mut self) -> Self {
109 self.profile_choice = ProfileChoice::Random;
110 self
111 }
112
113 pub fn config(mut self, path: impl Into<String>) -> Self {
115 self.profile_choice = ProfileChoice::ConfigFile(path.into());
116 self
117 }
118
119 pub fn user_data_dir(mut self, path: impl Into<String>) -> Self {
121 self.user_data_dir = Some(path.into());
122 self
123 }
124
125 pub fn window_size(mut self, w: u32, h: u32) -> Self {
127 self.window_size = (w, h);
128 self
129 }
130
131 pub fn proxy(mut self, proxy: impl Into<String>) -> Self {
134 self.proxy = Some(proxy.into());
135 self
136 }
137
138 pub fn proxy_socks5(mut self, host: &str, port: u16, user: &str, pass: &str) -> Self {
140 self.proxy = Some(format!("socks5://{}:{}@{}:{}", user, pass, host, port));
141 self
142 }
143
144 pub fn arg(mut self, arg: impl Into<String>) -> Self {
146 self.extra_args.push(arg.into());
147 self
148 }
149
150 pub async fn build(self) -> Result<Browser> {
152 let chrome_path = self
153 .chrome_path
154 .or_else(|| std::env::var("CLAWSER_CHROME_PATH").ok())
155 .ok_or("set CLAWSER_CHROME_PATH env or call .chrome_path()")?;
156
157 let (config_path, profile_id) = match self.profile_choice {
158 ProfileChoice::Random => {
159 let (idx, seed) = profile::random_profile();
160 let p = profile::write_config_file(idx, seed)?;
161 (Some(p), Some(format!("clawser_{idx}_{seed}")))
162 }
163 ProfileChoice::Indexed { index, seed } => {
164 let p = profile::write_config_file(index, seed)?;
165 (Some(p), Some(format!("clawser_{index}_{seed}")))
166 }
167 ProfileChoice::ConfigFile(ref path) => (Some(PathBuf::from(path)), None),
168 ProfileChoice::None => (None, None),
169 };
170
171 let mut cb = BrowserConfig::builder()
172 .chrome_executable(&chrome_path)
173 .disable_default_args()
174 .no_sandbox()
175 .with_head()
176 .window_size(self.window_size.0, self.window_size.1)
177 .viewport(None);
178
179 if self.headless {
180 cb = cb.arg(("headless", "new"));
181 }
182
183 if let Some(ref cp) = config_path {
184 let p = cp.to_string_lossy().replace('/', "\\");
185 cb = cb.arg(("clawser-config", p.as_str()));
186 }
187
188 if let Some(ref udd) = self.user_data_dir {
189 cb = cb.user_data_dir(udd);
190 } else if let Some(ref id) = profile_id {
191 let profiles_dir = std::path::Path::new(&chrome_path)
193 .parent()
194 .unwrap_or(std::path::Path::new("."))
195 .join("clawser_profiles")
196 .join(id);
197 cb = cb.user_data_dir(profiles_dir);
198 }
199
200 if let Some(ref proxy) = self.proxy {
201 cb = cb.arg(("proxy-server", proxy.as_str()));
202 }
203
204 cb = cb
205 .arg(("disable-blink-features", "AutomationControlled"))
206 .arg(("remote-allow-origins", "*"))
207 .arg("no-first-run")
208 .arg("no-default-browser-check");
209
210 for a in &self.extra_args {
211 cb = cb.arg(a.as_str());
212 }
213
214 let config = cb.build().map_err(|e| format!("browser config: {e}"))?;
215 let (browser, mut handler) = CdpBrowser::launch(config).await?;
216
217 let handle = tokio::spawn(async move {
218 while let Some(event) = handler.next().await {
219 if event.is_err() {
220 break;
221 }
222 }
223 });
224
225 Ok(Browser {
226 inner: browser,
227 _handler: handle,
228 _config_path: config_path,
229 })
230 }
231}
232
233pub struct Browser {
236 inner: CdpBrowser,
237 _handler: JoinHandle<()>,
238 _config_path: Option<PathBuf>,
239}
240
241impl Browser {
242 pub fn builder() -> BrowserBuilder {
243 BrowserBuilder::new()
244 }
245
246 pub async fn connect(ws_url: &str) -> Result<Self> {
248 let (browser, mut handler) = CdpBrowser::connect(ws_url).await?;
249 let handle = tokio::spawn(async move {
250 while let Some(e) = handler.next().await {
251 if e.is_err() {
252 break;
253 }
254 }
255 });
256 Ok(Self {
257 inner: browser,
258 _handler: handle,
259 _config_path: None,
260 })
261 }
262
263 pub async fn new_page(&self, url: &str) -> Result<Page> {
266 let page = self.inner.new_page(url).await?;
267 let (tx, rx) = tokio::sync::watch::channel(false);
268 let sim_page = page.clone();
269 tokio::spawn(async move {
270 human_loop(sim_page, rx, 1920.0, 1080.0).await;
271 });
272 Ok(Page { inner: page, _sim_cancel: tx })
273 }
274
275 pub async fn pages(&self) -> Result<Vec<Page>> {
277 let mut result = Vec::new();
278 for p in self.inner.pages().await? {
279 let (tx, rx) = tokio::sync::watch::channel(false);
280 let sim_page = p.clone();
281 tokio::spawn(async move {
282 human_loop(sim_page, rx, 1920.0, 1080.0).await;
283 });
284 result.push(Page { inner: p, _sim_cancel: tx });
285 }
286 Ok(result)
287 }
288
289 pub async fn cookies(&self) -> Result<Vec<Cookie>> {
291 Ok(self.inner.get_cookies().await?)
292 }
293
294 pub fn cdp(&self) -> &CdpBrowser {
296 &self.inner
297 }
298
299 pub async fn close(mut self) -> Result<()> {
301 self.inner.close().await?;
302 let _ = self._handler.await;
303 Ok(())
304 }
305}
306
307pub struct Page {
310 inner: CdpPage,
311 _sim_cancel: tokio::sync::watch::Sender<bool>,
312}
313
314impl Page {
315 pub async fn navigate(&self, url: &str) -> Result<()> {
317 self.inner.execute(NavigateParams::new(url)).await?;
318 Ok(())
319 }
320
321 pub async fn goto(&self, url: &str) -> Result<()> {
323 self.inner.goto(url).await?;
324 Ok(())
325 }
326
327 pub async fn wait_for_load(&self) -> Result<()> {
329 self.inner.wait_for_navigation().await?;
330 Ok(())
331 }
332
333 pub async fn js(&self, expr: &str) -> Result<String> {
335 let result = self.inner.evaluate(expr).await?;
336 match result.value() {
337 Some(serde_json::Value::String(s)) => Ok(s.clone()),
338 Some(serde_json::Value::Null) | None => Ok(String::new()),
339 Some(v) => Ok(v.to_string()),
340 }
341 }
342
343 pub async fn js_as<T: serde::de::DeserializeOwned>(&self, expr: &str) -> Result<T> {
345 let result = self.inner.evaluate(expr).await?;
346 Ok(result.into_value()?)
347 }
348
349 pub async fn js_on_new_document(&self, script: &str) -> Result<()> {
351 self.inner.evaluate_on_new_document(script).await?;
352 Ok(())
353 }
354
355 pub async fn url(&self) -> Result<String> {
357 Ok(self.inner.url().await?.unwrap_or_default())
358 }
359
360 pub async fn title(&self) -> Result<String> {
362 Ok(self.inner.get_title().await?.unwrap_or_default())
363 }
364
365 pub async fn html(&self) -> Result<String> {
367 Ok(self.inner.content().await?)
368 }
369
370 pub async fn screenshot(&self) -> Result<Vec<u8>> {
372 Ok(self
373 .inner
374 .screenshot(CaptureScreenshotParams::default())
375 .await?)
376 }
377
378 pub async fn click(&self, x: f64, y: f64) -> Result<()> {
380 self.inner
381 .execute(DispatchMouseEventParams::new(
382 DispatchMouseEventType::MouseMoved,
383 x,
384 y,
385 ))
386 .await?;
387 tokio::time::sleep(jitter(20, 60)).await;
388
389 let mut press =
390 DispatchMouseEventParams::new(DispatchMouseEventType::MousePressed, x, y);
391 press.button = Some(MouseButton::Left);
392 press.click_count = Some(1);
393 self.inner.execute(press).await?;
394 tokio::time::sleep(jitter(40, 120)).await;
395
396 let mut release =
397 DispatchMouseEventParams::new(DispatchMouseEventType::MouseReleased, x, y);
398 release.button = Some(MouseButton::Left);
399 release.click_count = Some(1);
400 self.inner.execute(release).await?;
401 Ok(())
402 }
403
404 pub async fn type_text(&self, text: &str) -> Result<()> {
406 for ch in text.chars() {
407 let s = ch.to_string();
408 let mut down = DispatchKeyEventParams::new(DispatchKeyEventType::KeyDown);
409 down.text = Some(s.clone());
410 down.key = Some(s.clone());
411 self.inner.execute(down).await?;
412
413 let mut up = DispatchKeyEventParams::new(DispatchKeyEventType::KeyUp);
414 up.key = Some(s);
415 self.inner.execute(up).await?;
416
417 tokio::time::sleep(jitter(30, 130)).await;
418 }
419 Ok(())
420 }
421
422 pub async fn scroll(&self, delta_y: f64) -> Result<()> {
424 let mut ev =
425 DispatchMouseEventParams::new(DispatchMouseEventType::MouseWheel, 400.0, 300.0);
426 ev.delta_x = Some(0.0);
427 ev.delta_y = Some(delta_y);
428 self.inner.execute(ev).await?;
429 Ok(())
430 }
431
432 pub async fn wait(&self, ms: u64) {
434 tokio::time::sleep(std::time::Duration::from_millis(ms)).await;
435 }
436
437 pub async fn close(self) -> Result<()> {
439 self.inner.close().await?;
440 Ok(())
441 }
442
443 pub fn cdp(&self) -> &CdpPage {
445 &self.inner
446 }
447}
448
449async fn human_loop(
458 page: CdpPage,
459 mut cancel: tokio::sync::watch::Receiver<bool>,
460 vw: f64,
461 vh: f64,
462) {
463 let mut rng = FastRng::new(nanos() ^ 0xDEAD_BEEF);
464 let mut mouse_x: f64 = vw / 2.0;
465 let mut mouse_y: f64 = vh / 2.0;
466
467 loop {
468 if *cancel.borrow() {
469 break;
470 }
471
472 let roll = rng.next_range(100);
474 if roll < 60 {
475 let tx = rng.next_f64() * (vw - 40.0) + 20.0;
477 let ty = rng.next_f64() * (vh - 40.0) + 20.0;
478 if bezier_move(&page, &mut cancel, &mut rng, mouse_x, mouse_y, tx, ty)
479 .await
480 .is_err()
481 {
482 break;
483 }
484 mouse_x = tx;
485 mouse_y = ty;
486 } else if roll < 85 {
487 let direction = if rng.next_range(100) < 80 { 1.0 } else { -1.0 };
489 let amount = (rng.next_range(200) as f64 + 50.0) * direction;
490 let mut ev = DispatchMouseEventParams::new(
491 DispatchMouseEventType::MouseWheel,
492 mouse_x,
493 mouse_y,
494 );
495 ev.delta_x = Some(0.0);
496 ev.delta_y = Some(amount);
497 let _ = page.execute(ev).await;
498 } else {
499 }
501
502 let delay = rng.next_range(3200) as u64 + 800;
504 tokio::select! {
505 _ = tokio::time::sleep(Duration::from_millis(delay)) => {}
506 _ = cancel.changed() => break,
507 }
508 }
509}
510
511async fn bezier_move(
513 page: &CdpPage,
514 cancel: &mut tokio::sync::watch::Receiver<bool>,
515 rng: &mut FastRng,
516 sx: f64,
517 sy: f64,
518 tx: f64,
519 ty: f64,
520) -> std::result::Result<(), ()> {
521 let cx = (sx + tx) / 2.0 + (rng.next_f64() - 0.5) * 200.0;
523 let cy = (sy + ty) / 2.0 + (rng.next_f64() - 0.5) * 200.0;
524
525 let steps = rng.next_range(10) + 8; for i in 1..=steps {
527 if *cancel.borrow() {
528 return Err(());
529 }
530 let t = i as f64 / steps as f64;
531 let inv = 1.0 - t;
532 let x = inv * inv * sx + 2.0 * inv * t * cx + t * t * tx;
534 let y = inv * inv * sy + 2.0 * inv * t * cy + t * t * ty;
535
536 let ev =
537 DispatchMouseEventParams::new(DispatchMouseEventType::MouseMoved, x, y);
538 let _ = page.execute(ev).await;
539
540 let step_delay = rng.next_range(17) as u64 + 8;
542 tokio::select! {
543 _ = tokio::time::sleep(Duration::from_millis(step_delay)) => {}
544 _ = cancel.changed() => return Err(()),
545 }
546 }
547 Ok(())
548}
549
550struct FastRng {
553 state: u64,
554}
555
556impl FastRng {
557 fn new(seed: u64) -> Self {
558 Self {
559 state: if seed == 0 { 0x1234_5678_9ABC_DEF0 } else { seed },
560 }
561 }
562
563 fn next(&mut self) -> u64 {
564 let mut s = self.state;
565 s ^= s << 13;
566 s ^= s >> 7;
567 s ^= s << 17;
568 self.state = s;
569 s
570 }
571
572 fn next_range(&mut self, max: u32) -> u32 {
573 (self.next() % max as u64) as u32
574 }
575
576 fn next_f64(&mut self) -> f64 {
577 (self.next() & 0x000F_FFFF_FFFF_FFFF) as f64 / (0x0010_0000_0000_0000u64 as f64)
578 }
579}
580
581fn nanos() -> u64 {
584 std::time::SystemTime::now()
585 .duration_since(std::time::UNIX_EPOCH)
586 .unwrap()
587 .subsec_nanos() as u64
588}
589
590fn jitter(min_ms: u64, max_ms: u64) -> std::time::Duration {
591 std::time::Duration::from_millis(min_ms + nanos() % (max_ms - min_ms))
592}