Skip to main content

stygian_plugin/
lib.rs

1//! stygian-plugin: Chrome browser plugin fallback scraper
2//!
3//! Provides a flexible, interactive visual data extraction framework as a fallback
4//! when stygian-graph and stygian-browser cannot scrape a page.
5//!
6//! # Architecture
7//!
8//! Following hexagonal architecture with clear separation:
9//!
10//! ```text
11//! ┌─────────────────────────────────────┐
12//! │  Application / MCP Layer            │
13//! │  (plugin_apply_template, etc.)      │
14//! └──────────────┬──────────────────────┘
15//!                │
16//! ┌──────────────▼──────────────────────┐
17//! │  Domain Layer (pure Rust)           │
18//! │  ExtractionTemplate                 │
19//! │  ExtractionRequest/Result           │
20//! │  Transformation Pipeline            │
21//! └──────────────┬──────────────────────┘
22//!                │
23//! ┌──────────────▼──────────────────────┐
24//! │  Ports (traits)                     │
25//! │  PluginTemplateStore                │
26//! │  PluginExtractionPort               │
27//! │  IdempotencyKeyStore                │
28//! └──────────────┬──────────────────────┘
29//!                │
30//! ┌──────────────▼──────────────────────┐
31//! │  Adapters (implementations)         │
32//! │  FileTemplateStore                  │
33//! │  ExtractionEngine                   │
34//! │  MemoryIdempotencyStore             │
35//! └─────────────────────────────────────┘
36//! ```
37//!
38//! # Features
39//!
40//! - **Template-based extraction**: Define schema once, apply to multiple elements
41//! - **Recording-based**: User clicks/highlights → learns pattern
42//! - **Query-driven**: Declarative extraction with CSS/XPath selectors
43//! - **Region-based**: Multiple independent zones, each with own rules
44//! - **Multi-instance**: Iterate template across matching elements
45//! - **Multi-set**: Extract different shapes from same page
46//! - **Cross-page**: Reuse templates in crawl sessions
47//! - **Idempotency**: Safe retries via ULID-based deduplication
48//! - **Transformations**: Regex, type coercion, HTML stripping, etc.
49//!
50//! # Quick Start
51//!
52//! ```no_run
53//! use stygian_plugin::domain::{ExtractionTemplate, Region, Selector, ExtractionRequest};
54//! use stygian_plugin::ports::PluginExtractionPort;
55//! use serde_json::json;
56//!
57//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
58//! // Create a template with regions
59//! let template = ExtractionTemplate::new("Product")
60//!     .with_region(
61//!         Region::new(
62//!             "title",
63//!             Selector::css(".product-title"),
64//!             json!({"type": "string"}),
65//!         )
66//!     )
67//!     .with_region(
68//!         Region::new(
69//!             "price",
70//!             Selector::css(".product-price"),
71//!             json!({"type": "number"}),
72//!         )
73//!     );
74//!
75//! // Create extraction request
76//! let request = ExtractionRequest::new(
77//!     template,
78//!     "https://example.com/products",
79//!     "<html>...</html>"
80//! );
81//!
82//! // Execute (requires a PluginExtractionPort adapter)
83//! // let result = extraction_port.execute(&request).await?;
84//! # Ok(())
85//! # }
86//! ```
87
88#![allow(clippy::multiple_crate_versions)]
89
90// ═══════════════════════════════════════════════════════════════════════════
91// Module Organization
92// ═══════════════════════════════════════════════════════════════════════════
93
94/// Error types
95pub mod error;
96
97/// Domain layer: pure business logic and value objects
98///
99/// Contains zero external dependencies; all I/O happens in adapters.
100pub mod domain;
101
102/// Port trait definitions: interfaces adapters must implement
103///
104/// The domain depends only on these traits, not on concrete implementations.
105pub mod ports;
106
107/// Adapter implementations: concrete providers of port traits
108pub mod adapters;
109
110/// Storage adapters: template persistence, idempotency tracking
111pub mod storage;
112
113/// MCP (Model Context Protocol) server for the plugin system
114pub mod mcp;
115
116/// HTTP transport for the MCP server (requires `http` feature)
117///
118/// Exposes JSON-RPC 2.0 over HTTP with CORS support for browser extension use.
119#[cfg(feature = "http")]
120pub mod http;
121
122/// Runtime configuration for the standalone MCP server
123pub mod config;
124
125// ═══════════════════════════════════════════════════════════════════════════
126// Public API Re-exports
127// ═══════════════════════════════════════════════════════════════════════════
128
129pub use domain::{
130    ExtractionRequest, ExtractionResult, ExtractionTemplate, IdempotencyKey, Region, Selector,
131    Transformation,
132};
133pub use error::{PluginError, Result};
134pub use mcp::{McpPluginServer, McpRequestHandler};
135pub use ports::{IdempotencyKeyStore, PluginExtractionPort, PluginTemplateStore};