pub use spider_core::{
CrawlShapePreset,
Crawler,
CrawlerBuilder,
CrawlerConfig,
DiscoveryConfig,
DiscoveryMode,
DiscoveryRule,
Downloader,
ReqwestClientDownloader,
Spider,
StartRequestIter,
StartRequests,
async_trait,
scheduler::Scheduler,
state::CrawlerState,
state::{
ConcurrentMap, ConcurrentVec, Counter, Counter64, Flag, StateAccessMetrics, VisitedUrls,
},
stats::StatCollector,
tokio,
};
pub use spider_util::item::{FieldValueType, ItemFieldSchema, ItemSchema, TypedItemSchema};
pub use spider_util::item::{ParseOutput, ScrapedItem};
pub use spider_pipeline::pipeline::Pipeline;
pub use spider_macro::scraped_item;
pub use spider_middleware::middleware::{Middleware, MiddlewareAction};
pub use spider_util::{
error::{PipelineError, SpiderError},
request::{Method, Request},
response::{Link, LinkExtractOptions, LinkSource, LinkType, PageMetadata, Response},
selector::{SelectorList, SelectorNode},
util::{create_dir, is_same_site, normalize_origin, validate_output_dir},
};
pub use spider_middleware::{
rate_limit::RateLimitMiddleware, referer::RefererMiddleware, retry::RetryMiddleware,
};
#[cfg(feature = "middleware-cache")]
pub use spider_middleware::http_cache::HttpCacheMiddleware;
#[cfg(feature = "middleware-autothrottle")]
pub use spider_middleware::autothrottle::AutoThrottleMiddleware;
#[cfg(feature = "middleware-proxy")]
pub use spider_middleware::proxy::ProxyMiddleware;
#[cfg(feature = "middleware-user-agent")]
pub use spider_middleware::user_agent::UserAgentMiddleware;
#[cfg(feature = "middleware-robots")]
pub use spider_middleware::robots::RobotsTxtMiddleware;
#[cfg(feature = "middleware-cookies")]
pub use spider_middleware::cookies::CookieMiddleware;
pub use spider_pipeline::{
console::ConsolePipeline,
dedup::DeduplicationPipeline,
schema::{
SchemaExportConfig, SchemaTransformPipeline, SchemaValidationPipeline, SchemaViolation,
},
transform::{TransformOperation, TransformPipeline},
validation::{JsonType, ValidationPipeline, ValidationRule},
};
#[cfg(feature = "pipeline-csv")]
pub use spider_pipeline::csv::CsvPipeline;
#[cfg(feature = "pipeline-json")]
pub use spider_pipeline::json::JsonPipeline;
#[cfg(feature = "pipeline-jsonl")]
pub use spider_pipeline::jsonl::JsonlPipeline;
#[cfg(feature = "pipeline-sqlite")]
pub use spider_pipeline::sqlite::SqlitePipeline;
#[cfg(feature = "pipeline-stream-json")]
pub use spider_pipeline::stream_json::StreamJsonPipeline;
#[cfg(feature = "checkpoint")]
pub use spider_core::checkpoint::{Checkpoint, SchedulerCheckpoint};