Skip to main content

stygian_graph/
lib.rs

1//! # Stygian Graph
2#![allow(clippy::multiple_crate_versions)]
3//!
4//! A high-performance, graph-based web scraping engine for Rust.
5//!
6//! ## Overview
7//!
8//! Stygian treats scraping pipelines as Directed Acyclic Graphs (DAGs) where each node
9//! is a pluggable service module (HTTP fetchers, AI extractors, headless browsers).
10//! Built for extreme concurrency and extensibility using hexagonal architecture.
11//!
12//! ## Quick Start
13//!
14//! ```no_run
15//! use stygian_graph::domain::graph::Pipeline;
16//! use stygian_graph::domain::pipeline::PipelineUnvalidated;
17//!
18//! #[tokio::main]
19//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
20//!     // Create a simple scraping pipeline
21//!     let config = serde_json::json!({
22//!         "nodes": [],
23//!         "edges": []
24//!     });
25//!     
26//!     let pipeline = PipelineUnvalidated::new(config)
27//!         .validate()?
28//!         .execute()
29//!         .complete(serde_json::json!({"status": "success"}));
30//!     
31//!     println!("Pipeline complete: {:?}", pipeline.results());
32//!     Ok(())
33//! }
34//! ```
35//!
36//! ## Architecture
37//!
38//! Stygian follows hexagonal (ports & adapters) architecture:
39//!
40//! - **Domain**: Core business logic (graph execution, pipeline orchestration)
41//! - **Ports**: Trait definitions (service interfaces, abstractions)
42//! - **Adapters**: Implementations (HTTP, AI providers, storage, caching)
43//! - **Application**: Orchestration (service registry, executor, CLI)
44//!
45//! ## Features
46//!
47//! - πŸ•ΈοΈ **Graph-based execution**: DAG pipelines with petgraph
48//! - πŸ€– **Multi-AI support**: Claude, GPT, Gemini, Copilot, Ollama
49//! - 🌐 **JavaScript rendering**: Optional browser automation via `stygian-browser`
50//! - πŸ“Š **Multi-modal extraction**: HTML, PDF, images, video, audio
51//! - πŸ›‘οΈ **Anti-bot handling**: User-Agent rotation, proxy support, rate limiting
52//! - πŸš€ **High concurrency**: Worker pools, backpressure, Tokio + Rayon
53//! - πŸ”„ **Idempotent operations**: Safe retries with idempotency keys
54//! - πŸ“ˆ **Observability**: Metrics, tracing, monitoring
55//!
56//! ## Crate Features
57//!
58//! - `browser` (default): Include stygian-browser for JavaScript rendering
59//! - `full`: All features enabled
60
61#![warn(missing_docs, rustdoc::broken_intra_doc_links)]
62#![deny(unsafe_code)]
63#![cfg_attr(docsrs, feature(doc_cfg))]
64
65// ═══════════════════════════════════════════════════════════════════════════
66// Internal Module Organization (Hexagonal Architecture)
67// ═══════════════════════════════════════════════════════════════════════════
68
69/// Core domain logic - graph execution, pipelines, orchestration
70///
71/// **Hexagonal principle**: Domain never imports adapters, only ports (traits).
72pub mod domain;
73
74/// Port trait definitions - service abstractions
75///
76/// Defines interfaces that adapters must implement:
77/// - `ScrapingService`: HTTP fetchers, browser automation
78/// - `AIProvider`: LLM extraction services
79/// - `CachePort`: Caching abstractions
80/// - `CircuitBreaker`: Resilience patterns
81pub mod ports;
82
83/// Adapter implementations - infrastructure concerns
84///
85/// Concrete implementations of port traits:
86/// - HTTP client with anti-bot features
87/// - AI providers (Claude, GPT, Gemini, Ollama)
88/// - Storage backends (file, S3, database)
89/// - Cache backends (memory, Redis, file)
90pub mod adapters;
91
92/// Application layer - orchestration and coordination
93///
94/// High-level coordination logic:
95/// - Service registry with dependency injection
96/// - Pipeline executor
97/// - CLI interface
98/// - Configuration management
99pub mod application;
100
101// ═══════════════════════════════════════════════════════════════════════════
102// Public API
103// ═══════════════════════════════════════════════════════════════════════════
104
105/// Error types used throughout the crate
106pub mod error {
107    pub use crate::domain::error::*;
108}
109
110/// Re-exports for convenient imports
111///
112/// # Example
113///
114/// ```
115/// use stygian_graph::prelude::*;
116/// ```
117pub mod prelude {
118    pub use crate::domain::pipeline::*;
119    pub use crate::error::*;
120    pub use crate::ports::*;
121}
122
123// Re-export browser crate if feature is enabled
124#[cfg(feature = "browser")]
125#[cfg_attr(docsrs, doc(cfg(feature = "browser")))]
126pub use stygian_browser;
127
128#[cfg(test)]
129mod tests {
130    #[test]
131    fn it_works() {
132        assert_eq!(2 + 2, 4);
133    }
134}