pub struct BlessCrawl { /* private fields */ }Expand description
BlessCrawl client for distributed web scraping operations.
Implementations§
Source§impl BlessCrawl
impl BlessCrawl
Sourcepub const DEFAULT_TIMEOUT_MS: u32 = 15_000u32
pub const DEFAULT_TIMEOUT_MS: u32 = 15_000u32
Default timeout in milliseconds (15 seconds)
Sourcepub const DEFAULT_WAIT_TIME_MS: u32 = 3_000u32
pub const DEFAULT_WAIT_TIME_MS: u32 = 3_000u32
Default wait time in milliseconds (3 seconds)
Sourcepub const MAX_TIMEOUT_MS: u32 = 120_000u32
pub const MAX_TIMEOUT_MS: u32 = 120_000u32
Maximum timeout in milliseconds (2 minutes)
Sourcepub const MAX_WAIT_TIME_MS: u32 = 20_000u32
pub const MAX_WAIT_TIME_MS: u32 = 20_000u32
Maximum wait time in milliseconds (20 seconds)
Sourcepub const MAX_SCRAPE_BUFFER_SIZE: usize = 2_097_152usize
pub const MAX_SCRAPE_BUFFER_SIZE: usize = 2_097_152usize
Maximum result buffer size in bytes (2MB)
Sourcepub const MAX_MAP_BUFFER_SIZE: usize = 1_048_576usize
pub const MAX_MAP_BUFFER_SIZE: usize = 1_048_576usize
Maximum result buffer size in bytes (1MB)
Sourcepub const MAX_CRAWL_BUFFER_SIZE: usize = 8_388_608usize
pub const MAX_CRAWL_BUFFER_SIZE: usize = 8_388_608usize
Maximum result buffer size in bytes (8MB)
Sourcepub fn with_config(config: ScrapeOptions) -> Result<Self, WebScrapeErrorKind>
pub fn with_config(config: ScrapeOptions) -> Result<Self, WebScrapeErrorKind>
Creates a new BlessCrawl instance with the given configuration.
Sourcepub fn get_config(&self) -> &ScrapeOptions
pub fn get_config(&self) -> &ScrapeOptions
Returns a reference to the current configuration.
pub fn handle(&self) -> u32
Sourcepub fn scrape(
&self,
url: &str,
options: Option<ScrapeOptions>,
) -> Result<Response<ScrapeData>, WebScrapeErrorKind>
pub fn scrape( &self, url: &str, options: Option<ScrapeOptions>, ) -> Result<Response<ScrapeData>, WebScrapeErrorKind>
Scrapes webpage content and returns it as markdown with metadata.
Examples found in repository?
examples/web-scrape.rs (line 27)
19fn example_scraping() {
20 println!("--- Example 1: Basic Web Scraping ---");
21
22 let url = "https://example.com";
23 println!("scraping: {}...", url);
24
25 // First scrape with default config
26 let response = BlessCrawl::default()
27 .scrape(url, None)
28 .expect("Failed to scrape");
29 println!("response with default config: {:?}", response);
30 println!();
31 println!(
32 "---------- markdown ----------\n{}\n------------------------------",
33 response.data.content
34 );
35}Sourcepub fn map(
&self,
url: &str,
options: Option<MapOptions>,
) -> Result<Response<MapData>, WebScrapeErrorKind>
pub fn map( &self, url: &str, options: Option<MapOptions>, ) -> Result<Response<MapData>, WebScrapeErrorKind>
Extracts all links from a webpage, categorized by type.
Examples found in repository?
examples/web-scrape.rs (line 49)
37fn example_mapping() {
38 println!("--- Example 2: Link Mapping/Discovery ---");
39
40 let url = "https://example.com";
41 println!("Mapping links from: {}", url);
42
43 let options = MapOptions::new()
44 .with_link_types(vec!["internal".to_string(), "external".to_string()])
45 .with_base_url(url.to_string())
46 .with_filter_extensions(vec![".html".to_string(), ".htm".to_string()]);
47
48 let response = BlessCrawl::default()
49 .map(url, Some(options))
50 .expect("Failed to map");
51 println!("response: {:?}", response);
52 println!();
53 println!(
54 "------------ links ------------\n{:?}\n------------------------------",
55 response.data.links
56 );
57 println!();
58 println!(
59 "------------ total links ------------\n{}\n------------------------------",
60 response.data.total_links
61 );
62}Sourcepub fn crawl(
&self,
url: &str,
options: Option<CrawlOptions>,
) -> Result<Response<CrawlData<ScrapeData>>, WebScrapeErrorKind>
pub fn crawl( &self, url: &str, options: Option<CrawlOptions>, ) -> Result<Response<CrawlData<ScrapeData>>, WebScrapeErrorKind>
Recursively crawls a website with configurable depth and filtering.
Examples found in repository?
examples/web-scrape.rs (line 80)
64fn example_crawling() {
65 println!("--- Example 3: Recursive Website Crawling ---");
66
67 let url = "https://example.com";
68 println!("Crawling website: {}", url);
69
70 let options = CrawlOptions::new()
71 .with_max_depth(2)
72 .with_limit(10)
73 .with_include_paths(vec!["/".to_string()])
74 .with_exclude_paths(vec!["/admin/".to_string(), "/api/".to_string()])
75 .with_follow_external(false)
76 .with_delay_between_requests(1000)
77 .with_parallel_requests(3);
78
79 let response = BlessCrawl::default()
80 .crawl(url, Some(options))
81 .expect("Failed to crawl");
82 println!("response: {:?}", response);
83 println!();
84 println!(
85 "------------ pages ------------\n{:?}\n------------------------------",
86 response.data.pages
87 );
88 println!();
89 println!(
90 "------------ total pages ------------\n{}\n------------------------------",
91 response.data.total_pages
92 );
93}Trait Implementations§
Source§impl Clone for BlessCrawl
impl Clone for BlessCrawl
Source§fn clone(&self) -> BlessCrawl
fn clone(&self) -> BlessCrawl
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for BlessCrawl
impl Debug for BlessCrawl
Source§impl Default for BlessCrawl
impl Default for BlessCrawl
Source§fn default() -> BlessCrawl
fn default() -> BlessCrawl
Returns the “default value” for a type. Read more
Auto Trait Implementations§
impl Freeze for BlessCrawl
impl RefUnwindSafe for BlessCrawl
impl Send for BlessCrawl
impl Sync for BlessCrawl
impl Unpin for BlessCrawl
impl UnwindSafe for BlessCrawl
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more