stygian_plugin/lib.rs
1//! stygian-plugin: Chrome browser plugin fallback scraper
2//!
3//! Provides a flexible, interactive visual data extraction framework as a fallback
4//! when stygian-graph and stygian-browser cannot scrape a page.
5//!
6//! # Architecture
7//!
8//! Following hexagonal architecture with clear separation:
9//!
10//! ```text
11//! ┌─────────────────────────────────────┐
12//! │ Application / MCP Layer │
13//! │ (plugin_apply_template, etc.) │
14//! └──────────────┬──────────────────────┘
15//! │
16//! ┌──────────────▼──────────────────────┐
17//! │ Domain Layer (pure Rust) │
18//! │ ExtractionTemplate │
19//! │ ExtractionRequest/Result │
20//! │ Transformation Pipeline │
21//! └──────────────┬──────────────────────┘
22//! │
23//! ┌──────────────▼──────────────────────┐
24//! │ Ports (traits) │
25//! │ PluginTemplateStore │
26//! │ PluginExtractionPort │
27//! │ IdempotencyKeyStore │
28//! └──────────────┬──────────────────────┘
29//! │
30//! ┌──────────────▼──────────────────────┐
31//! │ Adapters (implementations) │
32//! │ FileTemplateStore │
33//! │ ExtractionEngine │
34//! │ MemoryIdempotencyStore │
35//! └─────────────────────────────────────┘
36//! ```
37//!
38//! # Features
39//!
40//! - **Template-based extraction**: Define schema once, apply to multiple elements
41//! - **Recording-based**: User clicks/highlights → learns pattern
42//! - **Query-driven**: Declarative extraction with CSS/XPath selectors
43//! - **Region-based**: Multiple independent zones, each with own rules
44//! - **Multi-instance**: Iterate template across matching elements
45//! - **Multi-set**: Extract different shapes from same page
46//! - **Cross-page**: Reuse templates in crawl sessions
47//! - **Idempotency**: Safe retries via ULID-based deduplication
48//! - **Transformations**: Regex, type coercion, HTML stripping, etc.
49//!
50//! # Quick Start
51//!
52//! ```no_run
53//! use stygian_plugin::domain::{ExtractionTemplate, Region, Selector, ExtractionRequest};
54//! use stygian_plugin::ports::PluginExtractionPort;
55//! use serde_json::json;
56//!
57//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
58//! // Create a template with regions
59//! let template = ExtractionTemplate::new("Product")
60//! .with_region(
61//! Region::new(
62//! "title",
63//! Selector::css(".product-title"),
64//! json!({"type": "string"}),
65//! )
66//! )
67//! .with_region(
68//! Region::new(
69//! "price",
70//! Selector::css(".product-price"),
71//! json!({"type": "number"}),
72//! )
73//! );
74//!
75//! // Create extraction request
76//! let request = ExtractionRequest::new(
77//! template,
78//! "https://example.com/products",
79//! "<html>...</html>"
80//! );
81//!
82//! // Execute (requires a PluginExtractionPort adapter)
83//! // let result = extraction_port.execute(&request).await?;
84//! # Ok(())
85//! # }
86//! ```
87
88#![allow(clippy::multiple_crate_versions)]
89
90// ═══════════════════════════════════════════════════════════════════════════
91// Module Organization
92// ═══════════════════════════════════════════════════════════════════════════
93
94/// Error types
95pub mod error;
96
97/// Domain layer: pure business logic and value objects
98///
99/// Contains zero external dependencies; all I/O happens in adapters.
100pub mod domain;
101
102/// Port trait definitions: interfaces adapters must implement
103///
104/// The domain depends only on these traits, not on concrete implementations.
105pub mod ports;
106
107/// Adapter implementations: concrete providers of port traits
108pub mod adapters;
109
110/// Storage adapters: template persistence, idempotency tracking
111pub mod storage;
112
113/// MCP (Model Context Protocol) server for the plugin system
114pub mod mcp;
115
116/// HTTP transport for the MCP server (requires `http` feature)
117///
118/// Exposes JSON-RPC 2.0 over HTTP with CORS support for browser extension use.
119#[cfg(feature = "http")]
120pub mod http;
121
122/// Runtime configuration for the standalone MCP server
123pub mod config;
124
125/// Extraction reliability scoring
126///
127/// Computes a 0.0–1.0 reliability score for [`domain::ExtractionResult`]
128/// outputs so fallback chains can optimize for *data quality*, not only
129/// fetch success. See the module-level docs for the score interpretation
130/// table and the selection policy.
131pub mod reliability;
132
133// ═══════════════════════════════════════════════════════════════════════════
134// Public API Re-exports
135// ═══════════════════════════════════════════════════════════════════════════
136
137pub use domain::{
138 ExtractionRequest, ExtractionResult, ExtractionTemplate, IdempotencyKey, Region, Selector,
139 Transformation,
140};
141pub use error::{PluginError, Result};
142pub use mcp::{McpPluginServer, McpRequestHandler};
143pub use ports::{IdempotencyKeyStore, PluginExtractionPort, PluginTemplateStore};
144pub use reliability::{
145 ReliabilityBand, ReliabilityScore, ReliabilityScorer, ScoreWeightedSelector, ScoredCandidate,
146 ScoringWeights,
147};