Skip to main content

spider_pipeline/
console.rs

1//! Item Pipeline for writing scraped items to the console.
2//!
3//! This module provides the `ConsolePipeline`, a basic and useful
4//! item pipeline for debugging and immediate inspection of scraped data.
5//! When integrated into a crawler, this pipeline simply logs the received
6//! `ScrapedItem`s to the console (or configured tracing output).
7//!
8//! It serves as a straightforward way to verify that spiders are extracting
9//! data correctly and that items are flowing through the pipeline as expected.
10use crate::pipeline::Pipeline;
11use async_trait::async_trait;
12use log::info;
13use spider_util::{error::PipelineError, item::ScrapedItem};
14
15/// A pipeline that prints scraped items to the console.
16pub struct ConsolePipeline;
17
18impl ConsolePipeline {
19    /// Creates a new `ConsolePipeline`.
20    pub fn new() -> Self {
21        Self
22    }
23}
24
25impl Default for ConsolePipeline {
26    fn default() -> Self {
27        Self::new()
28    }
29}
30
31#[async_trait]
32impl<I: ScrapedItem> Pipeline<I> for ConsolePipeline {
33    fn name(&self) -> &str {
34        "ConsolePipeline"
35    }
36
37    async fn process_item(&self, item: I) -> Result<Option<I>, PipelineError> {
38        info!("Pipeline processing item: {:?}", item);
39        Ok(Some(item))
40    }
41}