spider_pipeline/lib.rs
1//! # spider-pipeline
2//!
3//! Built-in pipeline implementations for the `spider-lib` framework.
4//!
5//! Processes, filters, transforms, and stores scraped data.
6//!
7//! ## Example
8//!
9//! ```rust,ignore
10//! use spider_pipeline::json::JsonPipeline;
11//! use spider_pipeline::console::ConsolePipeline;
12//!
13//! let crawler = CrawlerBuilder::new(MySpider)
14//! .add_pipeline(JsonPipeline::new("output.json")?)
15//! .add_pipeline(ConsolePipeline::new())
16//! .build()
17//! .await?;
18//! ```
19
20// Core pipelines (always available)
21pub mod console;
22pub mod dedup;
23pub mod pipeline;
24pub mod transform;
25pub mod validation;
26
27// Optional pipelines (feature-gated)
28#[cfg(feature = "pipeline-csv")]
29pub mod csv;
30
31#[cfg(feature = "pipeline-json")]
32pub mod json;
33
34#[cfg(feature = "pipeline-jsonl")]
35pub mod jsonl;
36
37#[cfg(feature = "pipeline-sqlite")]
38pub mod sqlite;
39
40#[cfg(feature = "pipeline-stream-json")]
41pub mod stream_json;