Struct BlessCrawl

Source
pub struct BlessCrawl { /* private fields */ }
Expand description

BlessCrawl client for distributed web scraping operations.

Implementations§

Source§

impl BlessCrawl

Source

pub const DEFAULT_TIMEOUT_MS: u32 = 15_000u32

Default timeout in milliseconds (15 seconds)

Source

pub const DEFAULT_WAIT_TIME_MS: u32 = 3_000u32

Default wait time in milliseconds (3 seconds)

Source

pub const MAX_TIMEOUT_MS: u32 = 120_000u32

Maximum timeout in milliseconds (2 minutes)

Source

pub const MAX_WAIT_TIME_MS: u32 = 20_000u32

Maximum wait time in milliseconds (20 seconds)

Source

pub const MAX_SCRAPE_BUFFER_SIZE: usize = 2_097_152usize

Maximum result buffer size in bytes (2MB)

Source

pub const MAX_MAP_BUFFER_SIZE: usize = 1_048_576usize

Maximum result buffer size in bytes (1MB)

Source

pub const MAX_CRAWL_BUFFER_SIZE: usize = 8_388_608usize

Maximum result buffer size in bytes (8MB)

Source

pub fn with_config(config: ScrapeOptions) -> Result<Self, WebScrapeErrorKind>

Creates a new BlessCrawl instance with the given configuration.

Source

pub fn get_config(&self) -> &ScrapeOptions

Returns a reference to the current configuration.

Source

pub fn handle(&self) -> u32

Source

pub fn scrape( &self, url: &str, options: Option<ScrapeOptions>, ) -> Result<Response<ScrapeData>, WebScrapeErrorKind>

Scrapes webpage content and returns it as markdown with metadata.

Examples found in repository?
examples/web-scrape.rs (line 27)
19fn example_scraping() {
20    println!("--- Example 1: Basic Web Scraping ---");
21
22    let url = "https://example.com";
23    println!("scraping: {}...", url);
24
25    // First scrape with default config
26    let response = BlessCrawl::default()
27        .scrape(url, None)
28        .expect("Failed to scrape");
29    println!("response with default config: {:?}", response);
30    println!();
31    println!(
32        "---------- markdown ----------\n{}\n------------------------------",
33        response.data.content
34    );
35}
Source

pub fn map( &self, url: &str, options: Option<MapOptions>, ) -> Result<Response<MapData>, WebScrapeErrorKind>

Extracts all links from a webpage, categorized by type.

Examples found in repository?
examples/web-scrape.rs (line 49)
37fn example_mapping() {
38    println!("--- Example 2: Link Mapping/Discovery ---");
39
40    let url = "https://example.com";
41    println!("Mapping links from: {}", url);
42
43    let options = MapOptions::new()
44        .with_link_types(vec!["internal".to_string(), "external".to_string()])
45        .with_base_url(url.to_string())
46        .with_filter_extensions(vec![".html".to_string(), ".htm".to_string()]);
47
48    let response = BlessCrawl::default()
49        .map(url, Some(options))
50        .expect("Failed to map");
51    println!("response: {:?}", response);
52    println!();
53    println!(
54        "------------ links ------------\n{:?}\n------------------------------",
55        response.data.links
56    );
57    println!();
58    println!(
59        "------------ total links ------------\n{}\n------------------------------",
60        response.data.total_links
61    );
62}
Source

pub fn crawl( &self, url: &str, options: Option<CrawlOptions>, ) -> Result<Response<CrawlData<ScrapeData>>, WebScrapeErrorKind>

Recursively crawls a website with configurable depth and filtering.

Examples found in repository?
examples/web-scrape.rs (line 80)
64fn example_crawling() {
65    println!("--- Example 3: Recursive Website Crawling ---");
66
67    let url = "https://example.com";
68    println!("Crawling website: {}", url);
69
70    let options = CrawlOptions::new()
71        .with_max_depth(2)
72        .with_limit(10)
73        .with_include_paths(vec!["/".to_string()])
74        .with_exclude_paths(vec!["/admin/".to_string(), "/api/".to_string()])
75        .with_follow_external(false)
76        .with_delay_between_requests(1000)
77        .with_parallel_requests(3);
78
79    let response = BlessCrawl::default()
80        .crawl(url, Some(options))
81        .expect("Failed to crawl");
82    println!("response: {:?}", response);
83    println!();
84    println!(
85        "------------ pages ------------\n{:?}\n------------------------------",
86        response.data.pages
87    );
88    println!();
89    println!(
90        "------------ total pages ------------\n{}\n------------------------------",
91        response.data.total_pages
92    );
93}

Trait Implementations§

Source§

impl Clone for BlessCrawl

Source§

fn clone(&self) -> BlessCrawl

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for BlessCrawl

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Default for BlessCrawl

Source§

fn default() -> BlessCrawl

Returns the “default value” for a type. Read more
Source§

impl Drop for BlessCrawl

Source§

fn drop(&mut self)

Executes the destructor for this type. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> ErasedDestructor for T
where T: 'static,