web_scrape/
web-scrape.rs

1use blockless_sdk::*;
2
3/// This example demonstrates how to use the Blockless SDK to perform web scraping
4/// using the BlessCrawl functionality.
5///
6/// It shows how to:
7/// - Create a BlessCrawl instance with default configuration
8/// - Scrape content from a single URL with custom configuration overrides
9/// - Map links from a webpage to discover available URLs
10/// - Handle errors and responses appropriately
11fn main() {
12    println!("=== Blockless Web Scraping SDK Example ===\n");
13
14    example_scraping();
15    example_mapping();
16    example_crawling();
17}
18
19fn example_scraping() {
20    println!("--- Example 1: Basic Web Scraping ---");
21
22    let url = "https://example.com";
23    println!("scraping: {}...", url);
24
25    // First scrape with default config
26    let response = BlessCrawl::default()
27        .scrape(url, None)
28        .expect("Failed to scrape");
29    println!("response with default config: {:?}", response);
30    println!();
31    println!(
32        "---------- markdown ----------\n{}\n------------------------------",
33        response.data.content
34    );
35}
36
37fn example_mapping() {
38    println!("--- Example 2: Link Mapping/Discovery ---");
39
40    let url = "https://example.com";
41    println!("Mapping links from: {}", url);
42
43    let options = MapOptions::new()
44        .with_link_types(vec!["internal".to_string(), "external".to_string()])
45        .with_base_url(url.to_string())
46        .with_filter_extensions(vec![".html".to_string(), ".htm".to_string()]);
47
48    let response = BlessCrawl::default()
49        .map(url, Some(options))
50        .expect("Failed to map");
51    println!("response: {:?}", response);
52    println!();
53    println!(
54        "------------ links ------------\n{:?}\n------------------------------",
55        response.data.links
56    );
57    println!();
58    println!(
59        "------------ total links ------------\n{}\n------------------------------",
60        response.data.total_links
61    );
62}
63
64fn example_crawling() {
65    println!("--- Example 3: Recursive Website Crawling ---");
66
67    let url = "https://example.com";
68    println!("Crawling website: {}", url);
69
70    let options = CrawlOptions::new()
71        .with_max_depth(2)
72        .with_limit(10)
73        .with_include_paths(vec!["/".to_string()])
74        .with_exclude_paths(vec!["/admin/".to_string(), "/api/".to_string()])
75        .with_follow_external(false)
76        .with_delay_between_requests(1000)
77        .with_parallel_requests(3);
78
79    let response = BlessCrawl::default()
80        .crawl(url, Some(options))
81        .expect("Failed to crawl");
82    println!("response: {:?}", response);
83    println!();
84    println!(
85        "------------ pages ------------\n{:?}\n------------------------------",
86        response.data.pages
87    );
88    println!();
89    println!(
90        "------------ total pages ------------\n{}\n------------------------------",
91        response.data.total_pages
92    );
93}