use ferrisfetcher::{
FerrisFetcher, Config,
ExtractionRuleBuilder, ExtractionType,
presets
};
use std::time::Duration;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing_subscriber::fmt::init();
let fetcher = FerrisFetcher::with_config_and_rules(
Config::new()
.with_user_agent("ArticleScraper/1.0")
.with_timeout(Duration::from_secs(15)),
presets::article() )?;
let url = "https://example.com/news/article";
println!("🔍 Extracting article data from: {}", url);
match fetcher.scrape(url).await {
Ok(result) => {
println!("✅ Successfully extracted article data!");
println!("\n📄 Page Information:");
println!(" Title: {:?}", result.title);
println!(" URL: {}", result.url);
println!(" Status: {}", result.status_code);
println!("\n📊 Extracted Article Data:");
for (field_name, values) in &result.extracted_data {
println!(" {}: {:?}", field_name, values);
}
println!("\n🔍 Metadata:");
for (key, value) in &result.metadata {
println!(" {}: {}", key, value);
}
}
Err(e) => {
eprintln!("❌ Failed to extract data: {}", e);
}
}
println!("\n🛠️ Custom extraction example:");
let mut custom_fetcher = FerrisFetcher::new()?;
custom_fetcher.add_extraction_rule(
ExtractionRuleBuilder::new("headings", "h1, h2, h3")
.extraction_type(ExtractionType::Text)
.multiple(true)
.build()
);
custom_fetcher.add_extraction_rule(
ExtractionRuleBuilder::new("links", "a[href]")
.extraction_type(ExtractionType::Attribute)
.attribute("href")
.multiple(true)
.build()
);
match custom_fetcher.scrape("https://example.com").await {
Ok(result) => {
println!("✅ Custom extraction successful!");
if let Some(headings) = result.extracted_data.get("headings") {
println!("\n📝 Found {} headings:", headings.len());
for (i, heading) in headings.iter().enumerate() {
println!(" {}. {}", i + 1, heading);
}
}
if let Some(links) = result.extracted_data.get("links") {
println!("\n🔗 Found {} links:", links.len());
for (i, link) in links.iter().take(5).enumerate() {
println!(" {}. {}", i + 1, link);
}
if links.len() > 5 {
println!(" ... and {} more", links.len() - 5);
}
}
}
Err(e) => {
eprintln!("❌ Custom extraction failed: {}", e);
}
}
Ok(())
}