use ferrisfetcher::{FerrisFetcherBuilder, ExtractionRuleBuilder, ExtractionType};
use std::time::Duration;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing_subscriber::fmt::init();
println!("๐๏ธ Example 1: Simple builder configuration");
let fetcher = FerrisFetcherBuilder::new()
.user_agent("BuilderExample/1.0")
.timeout(Duration::from_secs(10))
.max_concurrent_requests(5)
.without_rate_limit()
.header("X-Custom-Header", "test-value")?
.build()?;
println!("โ
Created fetcher with custom configuration");
println!(" User Agent: {}", fetcher.config().user_agent);
println!(" Timeout: {:?}", fetcher.config().timeout);
println!(" Max Concurrent: {}", fetcher.config().max_concurrent_requests);
println!(" Rate Limiting: {:?}", fetcher.config().rate_limit);
println!("\n๐๏ธ Example 2: Advanced configuration with extraction rules");
let advanced_fetcher = FerrisFetcherBuilder::new()
.config(
ferrisfetcher::Config::new()
.with_user_agent("AdvancedScraper/2.0")
.with_timeout(Duration::from_secs(30))
.with_max_concurrent_requests(10)
.with_retry_policy(ferrisfetcher::RetryPolicy {
max_attempts: 5,
base_delay: Duration::from_millis(500),
max_delay: Duration::from_secs(10),
exponential_backoff: true,
backoff_multiplier: 1.5,
})
)
.add_rule(
ExtractionRuleBuilder::new("titles", "h1, h2, h3")
.extraction_type(ExtractionType::Text)
.multiple(true)
.build()
)
.add_rule(
ExtractionRuleBuilder::new("external_links", "a[href^='http']")
.extraction_type(ExtractionType::Attribute)
.attribute("href")
.multiple(true)
.build()
)
.add_rule(
ExtractionRuleBuilder::new("images", "img[src]")
.extraction_type(ExtractionType::Attribute)
.attribute("src")
.multiple(true)
.build()
)
.header("Accept-Language", "en-US,en;q=0.9")?
.header("DNT", "1")?
.build()?;
println!("โ
Created advanced fetcher with extraction rules");
println!(" Extraction Rules: {}", advanced_fetcher.extraction_rules().len());
for rule_name in advanced_fetcher.extraction_rules().keys() {
println!(" - {}", rule_name);
}
println!("\n๐๏ธ Example 3: Configuration with proxy (if available)");
if let Ok(proxy_url) = url::Url::parse("http://proxy.example.com:8080") {
let proxy_fetcher = FerrisFetcherBuilder::new()
.user_agent("ProxyScraper/1.0")
.proxy(proxy_url)
.without_cookies()
.without_redirects()
.build();
match proxy_fetcher {
Ok(fetcher) => {
println!("โ
Created fetcher with proxy configuration");
println!(" Proxy: {:?}", fetcher.config().proxy);
println!(" Cookies: {}", fetcher.config().cookie_jar);
println!(" Redirects: {}", fetcher.config().follow_redirects);
}
Err(e) => {
println!("โ ๏ธ Proxy configuration failed (expected if proxy is unavailable): {}", e);
}
}
} else {
println!("โ ๏ธ Invalid proxy URL format");
}
println!("\n๐งช Example 4: Testing the configured fetcher");
let test_url = "https://example.com";
println!("๐ Testing with URL: {}", test_url);
match advanced_fetcher.scrape(test_url).await {
Ok(result) => {
println!("โ
Scraping successful!");
println!(" Title: {:?}", result.title);
println!(" Status: {}", result.status_code);
println!(" Content Length: {} bytes", result.content.len());
println!("\n๐ Extracted Data:");
for (field, values) in &result.extracted_data {
println!(" {}: {} values", field, values.len());
if values.len() <= 3 {
for value in values {
println!(" - {}", value);
}
} else {
for value in values.iter().take(3) {
println!(" - {}", value);
}
println!(" ... and {} more", values.len() - 3);
}
}
println!("\n๐ Metadata:");
for (key, value) in &result.metadata {
if key != "json_ld" { println!(" {}: {}", key, value);
}
}
}
Err(e) => {
eprintln!("โ Scraping failed: {}", e);
}
}
println!("\n๐ Example 5: Performance comparison");
let urls = vec![
"https://example.com",
"https://example.org",
"https://example.net",
];
println!("๐ Testing with default configuration...");
let default_fetcher = ferrisfetcher::FerrisFetcher::new()?;
let start = std::time::Instant::now();
match default_fetcher.scrape_multiple(&urls).await {
Ok(results) => {
let default_time = start.elapsed();
println!(" โ
Default: {} URLs in {:?}", results.len(), default_time);
}
Err(e) => {
println!(" โ Default failed: {}", e);
}
}
println!("๐ Testing with optimized configuration...");
let start = std::time::Instant::now();
match advanced_fetcher.scrape_multiple(&urls).await {
Ok(results) => {
let optimized_time = start.elapsed();
println!(" โ
Optimized: {} URLs in {:?}", results.len(), optimized_time);
}
Err(e) => {
println!(" โ Optimized failed: {}", e);
}
}
println!("\n๐ฏ Builder pattern examples completed!");
Ok(())
}