stygian_graph/lib.rs
1//! # Stygian Graph
2#![allow(clippy::multiple_crate_versions)]
3//!
4//! A high-performance, graph-based web scraping engine for Rust.
5//!
6//! ## Overview
7//!
8//! Stygian treats scraping pipelines as Directed Acyclic Graphs (DAGs) where each node
9//! is a pluggable service module (HTTP fetchers, AI extractors, headless browsers).
10//! Built for extreme concurrency and extensibility using hexagonal architecture.
11//!
12//! ## Quick Start
13//!
14//! ```no_run
15//! use stygian_graph::domain::graph::Pipeline;
16//! use stygian_graph::domain::pipeline::PipelineUnvalidated;
17//!
18//! #[tokio::main]
19//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
20//! // Create a simple scraping pipeline
21//! let config = serde_json::json!({
22//! "nodes": [],
23//! "edges": []
24//! });
25//!
26//! let pipeline = PipelineUnvalidated::new(config)
27//! .validate()?
28//! .execute()
29//! .complete(serde_json::json!({"status": "success"}));
30//!
31//! println!("Pipeline complete: {:?}", pipeline.results());
32//! Ok(())
33//! }
34//! ```
35//!
36//! ## Architecture
37//!
38//! Stygian follows hexagonal (ports & adapters) architecture:
39//!
40//! - **Domain**: Core business logic (graph execution, pipeline orchestration)
41//! - **Ports**: Trait definitions (service interfaces, abstractions)
42//! - **Adapters**: Implementations (HTTP, AI providers, storage, caching)
43//! - **Application**: Orchestration (service registry, executor, CLI)
44//!
45//! ## Features
46//!
47//! - πΈοΈ **Graph-based execution**: DAG pipelines with petgraph
48//! - π€ **Multi-AI support**: Claude, GPT, Gemini, Copilot, Ollama
49//! - π **JavaScript rendering**: Optional browser automation via `stygian-browser`
50//! - π **Multi-modal extraction**: HTML, PDF, images, video, audio
51//! - π‘οΈ **Anti-bot handling**: User-Agent rotation, proxy support, rate limiting
52//! - π **High concurrency**: Worker pools, backpressure, Tokio + Rayon
53//! - π **Idempotent operations**: Safe retries with idempotency keys
54//! - π **Observability**: Metrics, tracing, monitoring
55//!
56//! ## Crate Features
57//!
58//! - `browser` (default): Include stygian-browser for JavaScript rendering
59//! - `full`: All features enabled
60
61#![warn(missing_docs, rustdoc::broken_intra_doc_links)]
62#![deny(unsafe_code)]
63#![cfg_attr(docsrs, feature(doc_cfg))]
64
65// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
66// Internal Module Organization (Hexagonal Architecture)
67// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
68
69/// Core domain logic - graph execution, pipelines, orchestration
70///
71/// **Hexagonal principle**: Domain never imports adapters, only ports (traits).
72pub mod domain;
73
74/// Port trait definitions - service abstractions
75///
76/// Defines interfaces that adapters must implement:
77/// - `ScrapingService`: HTTP fetchers, browser automation
78/// - `AIProvider`: LLM extraction services
79/// - `CachePort`: Caching abstractions
80/// - `CircuitBreaker`: Resilience patterns
81pub mod ports;
82
83/// Adapter implementations - infrastructure concerns
84///
85/// Concrete implementations of port traits:
86/// - HTTP client with anti-bot features
87/// - AI providers (Claude, GPT, Gemini, Ollama)
88/// - Storage backends (file, S3, database)
89/// - Cache backends (memory, Redis, file)
90pub mod adapters;
91
92/// Application layer - orchestration and coordination
93///
94/// High-level coordination logic:
95/// - Service registry with dependency injection
96/// - Pipeline executor
97/// - CLI interface
98/// - Configuration management
99pub mod application;
100
101// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
102// Public API
103// βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
104
105/// Error types used throughout the crate
106pub mod error {
107 pub use crate::domain::error::*;
108}
109
110/// Re-exports for convenient imports
111///
112/// # Example
113///
114/// ```
115/// use stygian_graph::prelude::*;
116/// ```
117pub mod prelude {
118 pub use crate::domain::pipeline::*;
119 pub use crate::error::*;
120 pub use crate::ports::*;
121}
122
123// Re-export browser crate if feature is enabled
124#[cfg(feature = "browser")]
125#[cfg_attr(docsrs, doc(cfg(feature = "browser")))]
126pub use stygian_browser;
127
128#[cfg(test)]
129mod tests {
130 #[test]
131 fn it_works() {
132 assert_eq!(2 + 2, 4);
133 }
134}