Skip to main content

scrapling_browser/
lib.rs

1//! Browser automation crate for the scrapling-rs web scraping framework.
2//!
3//! This crate provides high-level browser automation built on top of Playwright, giving
4//! you two session types for fetching fully-rendered web pages:
5//!
6//! - [`DynamicSession`] -- a standard Playwright-driven browser that executes JavaScript,
7//!   waits for network activity to settle, and returns the final DOM. Use this when the
8//!   target site does not employ bot-detection.
9//!
10//! - [`StealthySession`] -- extends `DynamicSession` with anti-detection measures such as
11//!   WebRTC leak prevention, canvas fingerprint noise, automation-flag removal, and an
12//!   automatic Cloudflare Turnstile solver. Use this when sites actively block headless
13//!   browsers.
14//!
15//! # Architecture overview
16//!
17//! ```text
18//!                  ┌──────────────┐
19//!                  │  Your code   │
20//!                  └──────┬───────┘
21//!                         │ .fetch(url)
22//!          ┌──────────────┴──────────────┐
23//!          │  DynamicSession / StealthySession  │  (fetcher.rs)
24//!          └──────────────┬──────────────┘
25//!                         │
26//!       ┌─────────────────┼─────────────────┐
27//!       ▼                 ▼                  ▼
28//!   engine.rs        intercept.rs      page_pool.rs
29//!  (launch opts)   (request blocking)  (page tracking)
30//!       │                 │
31//!       ▼                 ▼
32//!   constants.rs     ad_domains.rs
33//!  (CLI flags)     (blocklist data)
34//! ```
35//!
36//! Configuration starts with [`BrowserConfig`] (or [`StealthConfig`] for stealth sessions).
37//! Per-request overrides are expressed via [`FetchParams`], which are merged with the
38//! session-level config into [`ResolvedFetchParams`] before each navigation.
39//!
40//! After navigation completes, the [`response_factory`] module extracts the page's HTML,
41//! status code, headers, and cookies into a unified [`scrapling_fetch::Response`] that the
42//! rest of the scrapling pipeline can parse and query.
43//!
44//! # Quick example
45//!
46//! ```rust,no_run
47//! use scrapling_browser::{BrowserConfig, DynamicSession};
48//!
49//! # async fn run() -> scrapling_browser::Result<()> {
50//! let config = BrowserConfig {
51//!     headless: true,
52//!     disable_resources: true,
53//!     ..Default::default()
54//! };
55//!
56//! let mut session = DynamicSession::new(config)?;
57//! session.start().await?;
58//!
59//! let response = session.fetch("https://example.com", None).await?;
60//! println!("status: {}", response.status);
61//!
62//! session.close().await?;
63//! # Ok(())
64//! # }
65//! ```
66
67pub mod ad_domains;
68pub mod config;
69pub mod constants;
70pub mod engine;
71pub mod error;
72pub mod fetcher;
73pub mod intercept;
74pub mod page_pool;
75pub mod response_factory;
76
77pub use config::{
78    BrowserConfig, CookieParam, FetchParams, ProxyConfig, ResolvedFetchParams, StealthConfig,
79    WaitState,
80};
81pub use error::{BrowserError, Result};
82pub use fetcher::{DynamicSession, StealthySession};
83pub use page_pool::{PagePool, PageState, PoolStats};