scrapling_browser/lib.rs
1//! Browser automation crate for the scrapling-rs web scraping framework.
2//!
3//! This crate provides high-level browser automation built on top of Playwright, giving
4//! you two session types for fetching fully-rendered web pages:
5//!
6//! - [`DynamicSession`] -- a standard Playwright-driven browser that executes JavaScript,
7//! waits for network activity to settle, and returns the final DOM. Use this when the
8//! target site does not employ bot-detection.
9//!
10//! - [`StealthySession`] -- extends `DynamicSession` with anti-detection measures such as
11//! WebRTC leak prevention, canvas fingerprint noise, automation-flag removal, and an
12//! automatic Cloudflare Turnstile solver. Use this when sites actively block headless
13//! browsers.
14//!
15//! # Architecture overview
16//!
17//! ```text
18//! ┌──────────────┐
19//! │ Your code │
20//! └──────┬───────┘
21//! │ .fetch(url)
22//! ┌──────────────┴──────────────┐
23//! │ DynamicSession / StealthySession │ (fetcher.rs)
24//! └──────────────┬──────────────┘
25//! │
26//! ┌─────────────────┼─────────────────┐
27//! ▼ ▼ ▼
28//! engine.rs intercept.rs page_pool.rs
29//! (launch opts) (request blocking) (page tracking)
30//! │ │
31//! ▼ ▼
32//! constants.rs ad_domains.rs
33//! (CLI flags) (blocklist data)
34//! ```
35//!
36//! Configuration starts with [`BrowserConfig`] (or [`StealthConfig`] for stealth sessions).
37//! Per-request overrides are expressed via [`FetchParams`], which are merged with the
38//! session-level config into [`ResolvedFetchParams`] before each navigation.
39//!
40//! After navigation completes, the [`response_factory`] module extracts the page's HTML,
41//! status code, headers, and cookies into a unified [`scrapling_fetch::Response`] that the
42//! rest of the scrapling pipeline can parse and query.
43//!
44//! # Quick example
45//!
46//! ```rust,no_run
47//! use scrapling_browser::{BrowserConfig, DynamicSession};
48//!
49//! # async fn run() -> scrapling_browser::Result<()> {
50//! let config = BrowserConfig {
51//! headless: true,
52//! disable_resources: true,
53//! ..Default::default()
54//! };
55//!
56//! let mut session = DynamicSession::new(config)?;
57//! session.start().await?;
58//!
59//! let response = session.fetch("https://example.com", None).await?;
60//! println!("status: {}", response.status);
61//!
62//! session.close().await?;
63//! # Ok(())
64//! # }
65//! ```
66
67pub mod ad_domains;
68pub mod config;
69pub mod constants;
70pub mod engine;
71pub mod error;
72pub mod fetcher;
73pub mod intercept;
74pub mod page_pool;
75pub mod response_factory;
76
77pub use config::{
78 BrowserConfig, CookieParam, FetchParams, ProxyConfig, ResolvedFetchParams, StealthConfig,
79 WaitState,
80};
81pub use error::{BrowserError, Result};
82pub use fetcher::{DynamicSession, StealthySession};
83pub use page_pool::{PagePool, PageState, PoolStats};