spider-lib 3.0.4

A Rust-based web scraping framework inspired by Scrapy (Python).
Documentation
use spider_lib::prelude::*;

#[scraped_item]
struct MinimalItem {
    title: String,
    url: String,
    status: u16,
    has_heading: bool,
}

struct MinimalSpider;

#[async_trait]
impl Spider for MinimalSpider {
    type Item = MinimalItem;
    type State = ();

    fn start_requests(&self) -> Result<StartRequests<'_>, SpiderError> {
        Ok(StartRequests::Urls(vec!["https://example.com/"]))
    }

    async fn parse(
        &self,
        response: Response,
        _state: &Self::State,
    ) -> Result<ParseOutput<Self::Item>, SpiderError> {
        let mut output = ParseOutput::new();

        let heading = response
            .css("h1::text")?
            .get()
            .unwrap_or_else(|| "Example Domain".to_string())
            .trim()
            .to_string();

        output.add_item(MinimalItem {
            title: heading.clone(),
            url: response.url.to_string(),
            status: response.status.as_u16(),
            has_heading: !heading.is_empty(),
        });

        Ok(output)
    }
}

#[tokio::main]
async fn main() -> Result<(), SpiderError> {
    let crawler = CrawlerBuilder::new(MinimalSpider)
        .limit(1)
        .log_level(log::LevelFilter::Info)
        .build()
        .await?;

    crawler.start_crawl().await
}