Skip to main content

spider_lib/pipelines/
console_writer.rs

1//! Item Pipeline for writing scraped items to the console.
2//!
3//! This module provides the `ConsoleWriterPipeline`, a basic and useful
4//! item pipeline for debugging and immediate inspection of scraped data.
5//! When integrated into a crawler, this pipeline simply logs the received
6//! `ScrapedItem`s to the console (or configured tracing output).
7//!
8//! It serves as a straightforward way to verify that spiders are extracting
9//! data correctly and that items are flowing through the pipeline as expected.
10use crate::{item::ScrapedItem, pipeline::Pipeline, PipelineError};
11use async_trait::async_trait;
12use tracing::info;
13
14/// A pipeline that prints scraped items to the console.
15pub struct ConsoleWriterPipeline;
16
17impl ConsoleWriterPipeline {
18    /// Creates a new `ConsoleWriterPipeline`.
19    pub fn new() -> Self {
20        Self
21    }
22}
23
24impl Default for ConsoleWriterPipeline {
25    fn default() -> Self {
26        Self::new()
27    }
28}
29
30#[async_trait]
31impl<I: ScrapedItem> Pipeline<I> for ConsoleWriterPipeline {
32    fn name(&self) -> &str {
33        "ConsoleWriterPipeline"
34    }
35
36    async fn process_item(&self, item: I) -> Result<Option<I>, PipelineError> {
37        info!("Pipeline processing item: {:?}", item);
38        Ok(Some(item))
39    }
40}