mod client;
mod profile;
pub use client::{HttpClient, HttpClientBuilder};
pub use profile::{generate_config_json, random_profile, write_config_file, HwProfile, PROFILES};
pub use wreq;
pub use wreq::header;
use chromiumoxide::browser::{Browser as CdpBrowser, BrowserConfig};
use chromiumoxide::cdp::browser_protocol::input::{
DispatchKeyEventParams, DispatchKeyEventType, DispatchMouseEventParams,
DispatchMouseEventType, MouseButton,
};
use chromiumoxide::cdp::browser_protocol::network::Cookie;
use chromiumoxide::cdp::browser_protocol::page::{CaptureScreenshotParams, NavigateParams};
use chromiumoxide::Page as CdpPage;
use futures_util::StreamExt;
use std::path::PathBuf;
use std::time::Duration;
use tokio::task::JoinHandle;
pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error + Send + Sync>>;
pub struct BrowserBuilder {
headless: bool,
chrome_path: Option<String>,
profile_choice: ProfileChoice,
user_data_dir: Option<String>,
window_size: (u32, u32),
proxy: Option<String>,
extra_args: Vec<String>,
}
enum ProfileChoice {
None,
Random,
Indexed { index: usize, seed: u64 },
ConfigFile(String),
}
impl Default for BrowserBuilder {
fn default() -> Self {
Self {
headless: true,
chrome_path: None,
profile_choice: ProfileChoice::None,
user_data_dir: None,
window_size: (1920, 1080),
proxy: None,
extra_args: Vec::new(),
}
}
}
impl BrowserBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn headful(mut self) -> Self {
self.headless = false;
self
}
pub fn headless(mut self) -> Self {
self.headless = true;
self
}
pub fn chrome_path(mut self, path: impl Into<String>) -> Self {
self.chrome_path = Some(path.into());
self
}
pub fn profile(mut self, index: usize, seed: u64) -> Self {
self.profile_choice = ProfileChoice::Indexed { index, seed };
self
}
pub fn random(mut self) -> Self {
self.profile_choice = ProfileChoice::Random;
self
}
pub fn config(mut self, path: impl Into<String>) -> Self {
self.profile_choice = ProfileChoice::ConfigFile(path.into());
self
}
pub fn user_data_dir(mut self, path: impl Into<String>) -> Self {
self.user_data_dir = Some(path.into());
self
}
pub fn window_size(mut self, w: u32, h: u32) -> Self {
self.window_size = (w, h);
self
}
pub fn proxy(mut self, proxy: impl Into<String>) -> Self {
self.proxy = Some(proxy.into());
self
}
pub fn proxy_socks5(mut self, host: &str, port: u16, user: &str, pass: &str) -> Self {
self.proxy = Some(format!("socks5://{}:{}@{}:{}", user, pass, host, port));
self
}
pub fn arg(mut self, arg: impl Into<String>) -> Self {
self.extra_args.push(arg.into());
self
}
pub async fn build(self) -> Result<Browser> {
let chrome_path = self
.chrome_path
.or_else(|| std::env::var("CLAWSER_CHROME_PATH").ok())
.ok_or("set CLAWSER_CHROME_PATH env or call .chrome_path()")?;
let (config_path, profile_id) = match self.profile_choice {
ProfileChoice::Random => {
let (idx, seed) = profile::random_profile();
let p = profile::write_config_file(idx, seed)?;
(Some(p), Some(format!("clawser_{idx}_{seed}")))
}
ProfileChoice::Indexed { index, seed } => {
let p = profile::write_config_file(index, seed)?;
(Some(p), Some(format!("clawser_{index}_{seed}")))
}
ProfileChoice::ConfigFile(ref path) => (Some(PathBuf::from(path)), None),
ProfileChoice::None => (None, None),
};
let mut cb = BrowserConfig::builder()
.chrome_executable(&chrome_path)
.disable_default_args()
.no_sandbox()
.with_head()
.window_size(self.window_size.0, self.window_size.1)
.viewport(None);
if self.headless {
cb = cb.arg(("headless", "new"));
}
if let Some(ref cp) = config_path {
let p = cp.to_string_lossy().replace('/', "\\");
cb = cb.arg(("clawser-config", p.as_str()));
}
if let Some(ref udd) = self.user_data_dir {
cb = cb.user_data_dir(udd);
} else if let Some(ref id) = profile_id {
let profiles_dir = std::path::Path::new(&chrome_path)
.parent()
.unwrap_or(std::path::Path::new("."))
.join("clawser_profiles")
.join(id);
cb = cb.user_data_dir(profiles_dir);
}
if let Some(ref proxy) = self.proxy {
cb = cb.arg(("proxy-server", proxy.as_str()));
}
cb = cb
.arg(("disable-blink-features", "AutomationControlled"))
.arg(("remote-allow-origins", "*"))
.arg("no-first-run")
.arg("no-default-browser-check");
for a in &self.extra_args {
cb = cb.arg(a.as_str());
}
let config = cb.build().map_err(|e| format!("browser config: {e}"))?;
let (browser, mut handler) = CdpBrowser::launch(config).await?;
let handle = tokio::spawn(async move {
while let Some(event) = handler.next().await {
if event.is_err() {
break;
}
}
});
Ok(Browser {
inner: browser,
_handler: handle,
_config_path: config_path,
})
}
}
pub struct Browser {
inner: CdpBrowser,
_handler: JoinHandle<()>,
_config_path: Option<PathBuf>,
}
impl Browser {
pub fn builder() -> BrowserBuilder {
BrowserBuilder::new()
}
pub async fn connect(ws_url: &str) -> Result<Self> {
let (browser, mut handler) = CdpBrowser::connect(ws_url).await?;
let handle = tokio::spawn(async move {
while let Some(e) = handler.next().await {
if e.is_err() {
break;
}
}
});
Ok(Self {
inner: browser,
_handler: handle,
_config_path: None,
})
}
pub async fn new_page(&self, url: &str) -> Result<Page> {
let page = self.inner.new_page(url).await?;
let (tx, rx) = tokio::sync::watch::channel(false);
let sim_page = page.clone();
tokio::spawn(async move {
human_loop(sim_page, rx, 1920.0, 1080.0).await;
});
Ok(Page { inner: page, _sim_cancel: tx })
}
pub async fn pages(&self) -> Result<Vec<Page>> {
let mut result = Vec::new();
for p in self.inner.pages().await? {
let (tx, rx) = tokio::sync::watch::channel(false);
let sim_page = p.clone();
tokio::spawn(async move {
human_loop(sim_page, rx, 1920.0, 1080.0).await;
});
result.push(Page { inner: p, _sim_cancel: tx });
}
Ok(result)
}
pub async fn cookies(&self) -> Result<Vec<Cookie>> {
Ok(self.inner.get_cookies().await?)
}
pub fn cdp(&self) -> &CdpBrowser {
&self.inner
}
pub async fn close(mut self) -> Result<()> {
self.inner.close().await?;
let _ = self._handler.await;
Ok(())
}
}
pub struct Page {
inner: CdpPage,
_sim_cancel: tokio::sync::watch::Sender<bool>,
}
impl Page {
pub async fn navigate(&self, url: &str) -> Result<()> {
self.inner.execute(NavigateParams::new(url)).await?;
Ok(())
}
pub async fn goto(&self, url: &str) -> Result<()> {
self.inner.goto(url).await?;
Ok(())
}
pub async fn wait_for_load(&self) -> Result<()> {
self.inner.wait_for_navigation().await?;
Ok(())
}
pub async fn js(&self, expr: &str) -> Result<String> {
let result = self.inner.evaluate(expr).await?;
match result.value() {
Some(serde_json::Value::String(s)) => Ok(s.clone()),
Some(serde_json::Value::Null) | None => Ok(String::new()),
Some(v) => Ok(v.to_string()),
}
}
pub async fn js_as<T: serde::de::DeserializeOwned>(&self, expr: &str) -> Result<T> {
let result = self.inner.evaluate(expr).await?;
Ok(result.into_value()?)
}
pub async fn js_on_new_document(&self, script: &str) -> Result<()> {
self.inner.evaluate_on_new_document(script).await?;
Ok(())
}
pub async fn url(&self) -> Result<String> {
Ok(self.inner.url().await?.unwrap_or_default())
}
pub async fn title(&self) -> Result<String> {
Ok(self.inner.get_title().await?.unwrap_or_default())
}
pub async fn html(&self) -> Result<String> {
Ok(self.inner.content().await?)
}
pub async fn screenshot(&self) -> Result<Vec<u8>> {
Ok(self
.inner
.screenshot(CaptureScreenshotParams::default())
.await?)
}
pub async fn click(&self, x: f64, y: f64) -> Result<()> {
self.inner
.execute(DispatchMouseEventParams::new(
DispatchMouseEventType::MouseMoved,
x,
y,
))
.await?;
tokio::time::sleep(jitter(20, 60)).await;
let mut press =
DispatchMouseEventParams::new(DispatchMouseEventType::MousePressed, x, y);
press.button = Some(MouseButton::Left);
press.click_count = Some(1);
self.inner.execute(press).await?;
tokio::time::sleep(jitter(40, 120)).await;
let mut release =
DispatchMouseEventParams::new(DispatchMouseEventType::MouseReleased, x, y);
release.button = Some(MouseButton::Left);
release.click_count = Some(1);
self.inner.execute(release).await?;
Ok(())
}
pub async fn type_text(&self, text: &str) -> Result<()> {
for ch in text.chars() {
let s = ch.to_string();
let mut down = DispatchKeyEventParams::new(DispatchKeyEventType::KeyDown);
down.text = Some(s.clone());
down.key = Some(s.clone());
self.inner.execute(down).await?;
let mut up = DispatchKeyEventParams::new(DispatchKeyEventType::KeyUp);
up.key = Some(s);
self.inner.execute(up).await?;
tokio::time::sleep(jitter(30, 130)).await;
}
Ok(())
}
pub async fn scroll(&self, delta_y: f64) -> Result<()> {
let mut ev =
DispatchMouseEventParams::new(DispatchMouseEventType::MouseWheel, 400.0, 300.0);
ev.delta_x = Some(0.0);
ev.delta_y = Some(delta_y);
self.inner.execute(ev).await?;
Ok(())
}
pub async fn wait(&self, ms: u64) {
tokio::time::sleep(std::time::Duration::from_millis(ms)).await;
}
pub async fn close(self) -> Result<()> {
self.inner.close().await?;
Ok(())
}
pub fn cdp(&self) -> &CdpPage {
&self.inner
}
}
async fn human_loop(
page: CdpPage,
mut cancel: tokio::sync::watch::Receiver<bool>,
vw: f64,
vh: f64,
) {
let mut rng = FastRng::new(nanos() ^ 0xDEAD_BEEF);
let mut mouse_x: f64 = vw / 2.0;
let mut mouse_y: f64 = vh / 2.0;
loop {
if *cancel.borrow() {
break;
}
let roll = rng.next_range(100);
if roll < 60 {
let tx = rng.next_f64() * (vw - 40.0) + 20.0;
let ty = rng.next_f64() * (vh - 40.0) + 20.0;
if bezier_move(&page, &mut cancel, &mut rng, mouse_x, mouse_y, tx, ty)
.await
.is_err()
{
break;
}
mouse_x = tx;
mouse_y = ty;
} else if roll < 85 {
let direction = if rng.next_range(100) < 80 { 1.0 } else { -1.0 };
let amount = (rng.next_range(200) as f64 + 50.0) * direction;
let mut ev = DispatchMouseEventParams::new(
DispatchMouseEventType::MouseWheel,
mouse_x,
mouse_y,
);
ev.delta_x = Some(0.0);
ev.delta_y = Some(amount);
let _ = page.execute(ev).await;
} else {
}
let delay = rng.next_range(3200) as u64 + 800;
tokio::select! {
_ = tokio::time::sleep(Duration::from_millis(delay)) => {}
_ = cancel.changed() => break,
}
}
}
async fn bezier_move(
page: &CdpPage,
cancel: &mut tokio::sync::watch::Receiver<bool>,
rng: &mut FastRng,
sx: f64,
sy: f64,
tx: f64,
ty: f64,
) -> std::result::Result<(), ()> {
let cx = (sx + tx) / 2.0 + (rng.next_f64() - 0.5) * 200.0;
let cy = (sy + ty) / 2.0 + (rng.next_f64() - 0.5) * 200.0;
let steps = rng.next_range(10) + 8; for i in 1..=steps {
if *cancel.borrow() {
return Err(());
}
let t = i as f64 / steps as f64;
let inv = 1.0 - t;
let x = inv * inv * sx + 2.0 * inv * t * cx + t * t * tx;
let y = inv * inv * sy + 2.0 * inv * t * cy + t * t * ty;
let ev =
DispatchMouseEventParams::new(DispatchMouseEventType::MouseMoved, x, y);
let _ = page.execute(ev).await;
let step_delay = rng.next_range(17) as u64 + 8;
tokio::select! {
_ = tokio::time::sleep(Duration::from_millis(step_delay)) => {}
_ = cancel.changed() => return Err(()),
}
}
Ok(())
}
struct FastRng {
state: u64,
}
impl FastRng {
fn new(seed: u64) -> Self {
Self {
state: if seed == 0 { 0x1234_5678_9ABC_DEF0 } else { seed },
}
}
fn next(&mut self) -> u64 {
let mut s = self.state;
s ^= s << 13;
s ^= s >> 7;
s ^= s << 17;
self.state = s;
s
}
fn next_range(&mut self, max: u32) -> u32 {
(self.next() % max as u64) as u32
}
fn next_f64(&mut self) -> f64 {
(self.next() & 0x000F_FFFF_FFFF_FFFF) as f64 / (0x0010_0000_0000_0000u64 as f64)
}
}
fn nanos() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.subsec_nanos() as u64
}
fn jitter(min_ms: u64, max_ms: u64) -> std::time::Duration {
std::time::Duration::from_millis(min_ms + nanos() % (max_ms - min_ms))
}