stygian_graph/ports.rs
1//! Port trait definitions - service abstractions
2//!
3//! Defines interfaces that adapters must implement.
4//! Following hexagonal architecture, these are the "ports" that connect
5//! domain logic to external infrastructure.
6
7use crate::domain::error::Result;
8use async_trait::async_trait;
9use serde_json::Value;
10
11/// Input to a scraping service
12///
13/// Contains the target URL and service-specific parameters that configure
14/// how the scraping operation should be performed.
15///
16/// # Example
17///
18/// ```
19/// use stygian_graph::ports::ServiceInput;
20/// use serde_json::json;
21///
22/// let input = ServiceInput {
23/// url: "https://example.com".to_string(),
24/// params: json!({
25/// "timeout_ms": 5000,
26/// "user_agent": "stygian/1.0"
27/// }),
28/// };
29/// ```
30#[derive(Debug, Clone)]
31pub struct ServiceInput {
32 /// Target URL to scrape
33 pub url: String,
34 /// Service-specific parameters (timeout, headers, etc.)
35 pub params: Value,
36}
37
38/// Output from a scraping service
39///
40/// Contains the raw scraped data and metadata about the operation
41/// for downstream processing and debugging.
42///
43/// # Example
44///
45/// ```
46/// use stygian_graph::ports::ServiceOutput;
47/// use serde_json::json;
48///
49/// let output = ServiceOutput {
50/// data: "<html>...</html>".to_string(),
51/// metadata: json!({
52/// "status_code": 200,
53/// "content_type": "text/html",
54/// "response_time_ms": 145
55/// }),
56/// };
57/// ```
58#[derive(Debug, Clone)]
59pub struct ServiceOutput {
60 /// Raw scraped data (HTML, JSON, binary, etc.)
61 pub data: String,
62 /// Metadata about the operation (status, timing, headers)
63 pub metadata: Value,
64}
65
66/// Primary port: `ScrapingService` trait
67///
68/// All scraping modules (HTTP, browser, JavaScript rendering) implement this trait.
69/// Uses `async_trait` for dyn compatibility with service registry.
70///
71/// # Example Implementation
72///
73/// ```no_run
74/// use stygian_graph::ports::{ScrapingService, ServiceInput, ServiceOutput};
75/// use stygian_graph::error::Result;
76/// use async_trait::async_trait;
77/// use serde_json::json;
78///
79/// struct MyService;
80///
81/// #[async_trait]
82/// impl ScrapingService for MyService {
83/// async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
84/// Ok(ServiceOutput {
85/// data: format!("Scraped: {}", input.url),
86/// metadata: json!({"status": "ok"}),
87/// })
88/// }
89///
90/// fn name(&self) -> &'static str {
91/// "my-service"
92/// }
93/// }
94/// ```
95#[async_trait]
96pub trait ScrapingService: Send + Sync {
97 /// Execute the scraping operation
98 ///
99 /// # Arguments
100 ///
101 /// * `input` - Service input containing URL and parameters
102 ///
103 /// # Returns
104 ///
105 /// * `Ok(ServiceOutput)` - Successful scraping result
106 /// * `Err(StygianError)` - Service error, timeout, or network failure
107 ///
108 /// # Example
109 ///
110 /// ```no_run
111 /// # use stygian_graph::ports::{ScrapingService, ServiceInput};
112 /// # use serde_json::json;
113 /// # async fn example(service: impl ScrapingService) {
114 /// let input = ServiceInput {
115 /// url: "https://example.com".to_string(),
116 /// params: json!({}),
117 /// };
118 /// let output = service.execute(input).await.unwrap();
119 /// println!("Data: {}", output.data);
120 /// # }
121 /// ```
122 async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput>;
123
124 /// Service name for identification in logs and metrics
125 ///
126 /// # Example
127 ///
128 /// ```no_run
129 /// # use stygian_graph::ports::ScrapingService;
130 /// # fn example(service: impl ScrapingService) {
131 /// println!("Using service: {}", service.name());
132 /// # }
133 /// ```
134 fn name(&self) -> &'static str;
135}
136
137/// Provider capability flags
138///
139/// Describes the capabilities supported by an AI provider.
140///
141/// # Example
142///
143/// ```
144/// use stygian_graph::ports::ProviderCapabilities;
145///
146/// let caps = ProviderCapabilities {
147/// streaming: true,
148/// vision: false,
149/// tool_use: true,
150/// json_mode: true,
151/// };
152/// assert!(caps.streaming, "Provider supports streaming");
153/// ```
154#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
155#[allow(clippy::struct_excessive_bools)] // 4 capability flags are clearer as named bools than a u8 bitmask
156pub struct ProviderCapabilities {
157 /// Supports streaming responses
158 pub streaming: bool,
159 /// Supports image/video analysis
160 pub vision: bool,
161 /// Supports function calling/tool use
162 pub tool_use: bool,
163 /// Native JSON output mode
164 pub json_mode: bool,
165}
166
167/// AI Provider port for LLM-based extraction
168///
169/// All LLM providers (Claude, GPT, Gemini, GitHub Copilot, Ollama) implement this trait.
170/// Uses `async_trait` for dyn compatibility with service registry.
171///
172/// # Example Implementation
173///
174/// ```no_run
175/// use stygian_graph::ports::{AIProvider, ProviderCapabilities};
176/// use stygian_graph::domain::error::Result;
177/// use async_trait::async_trait;
178/// use serde_json::{json, Value};
179/// use futures::stream::{Stream, BoxStream};
180///
181/// struct MyProvider;
182///
183/// #[async_trait]
184/// impl AIProvider for MyProvider {
185/// async fn extract(&self, content: String, schema: Value) -> Result<Value> {
186/// Ok(json!({"extracted": "data"}))
187/// }
188///
189/// async fn stream_extract(
190/// &self,
191/// content: String,
192/// schema: Value,
193/// ) -> Result<BoxStream<'static, Result<Value>>> {
194/// unimplemented!("Streaming not supported")
195/// }
196///
197/// fn capabilities(&self) -> ProviderCapabilities {
198/// ProviderCapabilities {
199/// streaming: false,
200/// vision: false,
201/// tool_use: true,
202/// json_mode: true,
203/// }
204/// }
205///
206/// fn name(&self) -> &'static str {
207/// "my-provider"
208/// }
209/// }
210/// ```
211#[async_trait]
212pub trait AIProvider: Send + Sync {
213 /// Extract structured data from content using LLM
214 ///
215 /// # Arguments
216 ///
217 /// * `content` - Raw content to analyze (text, HTML, etc.)
218 /// * `schema` - JSON schema defining expected output structure
219 ///
220 /// # Returns
221 ///
222 /// * `Ok(Value)` - Extracted data matching the schema
223 /// * `Err(ProviderError)` - API error, token limit, or policy violation
224 ///
225 /// # Example
226 ///
227 /// ```no_run
228 /// # use stygian_graph::ports::AIProvider;
229 /// # use serde_json::json;
230 /// # async fn example(provider: impl AIProvider) {
231 /// let schema = json!({
232 /// "type": "object",
233 /// "properties": {
234 /// "title": {"type": "string"},
235 /// "price": {"type": "number"}
236 /// }
237 /// });
238 /// let result = provider.extract("<html>...</html>".to_string(), schema).await.unwrap();
239 /// println!("Extracted: {}", result);
240 /// # }
241 /// ```
242 async fn extract(&self, content: String, schema: Value) -> Result<Value>;
243
244 /// Stream structured data extraction for real-time processing
245 ///
246 /// Returns a stream of partial results as they arrive from the LLM.
247 /// Only supported by providers with `capabilities().streaming == true`.
248 ///
249 /// # Arguments
250 ///
251 /// * `content` - Raw content to analyze
252 /// * `schema` - JSON schema defining expected output structure
253 ///
254 /// # Returns
255 ///
256 /// * `Ok(BoxStream)` - Stream of partial extraction results
257 /// * `Err(ProviderError)` - If streaming is not supported or API error
258 ///
259 /// # Example
260 ///
261 /// ```no_run
262 /// # use stygian_graph::ports::AIProvider;
263 /// # use serde_json::json;
264 /// # use futures::StreamExt;
265 /// # async fn example(provider: impl AIProvider) {
266 /// let schema = json!({"type": "object"});
267 /// let mut stream = provider.stream_extract("content".to_string(), schema).await.unwrap();
268 /// while let Some(result) = stream.next().await {
269 /// match result {
270 /// Ok(partial) => println!("Chunk: {}", partial),
271 /// Err(e) => eprintln!("Stream error: {}", e),
272 /// }
273 /// }
274 /// # }
275 /// ```
276 async fn stream_extract(
277 &self,
278 content: String,
279 schema: Value,
280 ) -> Result<futures::stream::BoxStream<'static, Result<Value>>>;
281
282 /// Get provider capabilities
283 ///
284 /// Returns a struct describing what features this provider supports.
285 ///
286 /// # Example
287 ///
288 /// ```no_run
289 /// # use stygian_graph::ports::AIProvider;
290 /// # fn example(provider: impl AIProvider) {
291 /// let caps = provider.capabilities();
292 /// if caps.streaming {
293 /// println!("Provider supports streaming");
294 /// }
295 /// if caps.vision {
296 /// println!("Provider supports image analysis");
297 /// }
298 /// # }
299 /// ```
300 fn capabilities(&self) -> ProviderCapabilities;
301
302 /// Provider name (claude, gpt, gemini, etc.)
303 ///
304 /// # Example
305 ///
306 /// ```no_run
307 /// # use stygian_graph::ports::AIProvider;
308 /// # fn example(provider: impl AIProvider) {
309 /// println!("Using provider: {}", provider.name());
310 /// # }
311 /// ```
312 fn name(&self) -> &'static str;
313}
314
315/// Cache port for storing/retrieving data with idempotency key support
316///
317/// Provides a key-value store interface for caching scraped content,
318/// API responses, and idempotency tracking.
319///
320/// # Example Implementation
321///
322/// ```no_run
323/// use stygian_graph::ports::CachePort;
324/// use stygian_graph::domain::error::Result;
325/// use async_trait::async_trait;
326/// use std::time::Duration;
327///
328/// struct MyCache;
329///
330/// #[async_trait]
331/// impl CachePort for MyCache {
332/// async fn get(&self, key: &str) -> Result<Option<String>> {
333/// // Fetch from cache backend
334/// Ok(Some("cached_value".to_string()))
335/// }
336///
337/// async fn set(&self, key: &str, value: String, ttl: Option<Duration>) -> Result<()> {
338/// // Store in cache backend
339/// Ok(())
340/// }
341///
342/// async fn invalidate(&self, key: &str) -> Result<()> {
343/// // Remove from cache
344/// Ok(())
345/// }
346///
347/// async fn exists(&self, key: &str) -> Result<bool> {
348/// // Check existence
349/// Ok(true)
350/// }
351/// }
352/// ```
353#[async_trait]
354pub trait CachePort: Send + Sync {
355 /// Get value from cache
356 ///
357 /// # Arguments
358 ///
359 /// * `key` - Cache key (URL, idempotency key, etc.)
360 ///
361 /// # Returns
362 ///
363 /// * `Ok(Some(String))` - Cache hit
364 /// * `Ok(None)` - Cache miss
365 /// * `Err(CacheError)` - Backend failure
366 ///
367 /// # Example
368 ///
369 /// ```no_run
370 /// # use stygian_graph::ports::CachePort;
371 /// # async fn example(cache: impl CachePort) {
372 /// match cache.get("page:123").await {
373 /// Ok(Some(content)) => println!("Cache hit: {}", content),
374 /// Ok(None) => println!("Cache miss"),
375 /// Err(e) => eprintln!("Cache error: {}", e),
376 /// }
377 /// # }
378 /// ```
379 async fn get(&self, key: &str) -> Result<Option<String>>;
380
381 /// Set value in cache with optional TTL
382 ///
383 /// # Arguments
384 ///
385 /// * `key` - Cache key
386 /// * `value` - Value to store (JSON, HTML, etc.)
387 /// * `ttl` - Optional expiration duration
388 ///
389 /// # Returns
390 ///
391 /// * `Ok(())` - Successfully stored
392 /// * `Err(CacheError)` - Backend failure
393 ///
394 /// # Example
395 ///
396 /// ```no_run
397 /// # use stygian_graph::ports::CachePort;
398 /// # use std::time::Duration;
399 /// # async fn example(cache: impl CachePort) {
400 /// cache.set(
401 /// "page:123",
402 /// "<html>...</html>".to_string(),
403 /// Some(Duration::from_secs(3600))
404 /// ).await.unwrap();
405 /// # }
406 /// ```
407 async fn set(&self, key: &str, value: String, ttl: Option<std::time::Duration>) -> Result<()>;
408
409 /// Invalidate cache entry
410 ///
411 /// # Arguments
412 ///
413 /// * `key` - Cache key to invalidate
414 ///
415 /// # Returns
416 ///
417 /// * `Ok(())` - Successfully invalidated (or key didn't exist)
418 /// * `Err(CacheError)` - Backend failure
419 ///
420 /// # Example
421 ///
422 /// ```no_run
423 /// # use stygian_graph::ports::CachePort;
424 /// # async fn example(cache: impl CachePort) {
425 /// cache.invalidate("page:123").await.unwrap();
426 /// # }
427 /// ```
428 async fn invalidate(&self, key: &str) -> Result<()>;
429
430 /// Check if key exists in cache
431 ///
432 /// # Arguments
433 ///
434 /// * `key` - Cache key to check
435 ///
436 /// # Returns
437 ///
438 /// * `Ok(true)` - Key exists
439 /// * `Ok(false)` - Key does not exist or expired
440 /// * `Err(CacheError)` - Backend failure
441 ///
442 /// # Example
443 ///
444 /// ```no_run
445 /// # use stygian_graph::ports::CachePort;
446 /// # async fn example(cache: impl CachePort) {
447 /// if cache.exists("page:123").await.unwrap() {
448 /// println!("Page is cached");
449 /// }
450 /// # }
451 /// ```
452 async fn exists(&self, key: &str) -> Result<bool>;
453}
454
455/// Circuit breaker state
456///
457/// Represents the current state of a circuit breaker following
458/// the standard circuit breaker pattern.
459///
460/// # State Transitions
461///
462/// ```text
463/// Closed ---(too many failures)---> Open
464/// Open -----(timeout elapsed)-----> HalfOpen
465/// HalfOpen --(success)-----------> Closed
466/// HalfOpen --(failure)-----------> Open
467/// ```
468///
469/// # Example
470///
471/// ```
472/// use stygian_graph::ports::CircuitState;
473///
474/// let state = CircuitState::Closed;
475/// assert!(matches!(state, CircuitState::Closed));
476/// ```
477#[derive(Debug, Clone, Copy, PartialEq, Eq)]
478pub enum CircuitState {
479 /// Normal operation, requests pass through
480 Closed,
481 /// Circuit is open, requests fail fast
482 Open,
483 /// Testing if service recovered, limited requests allowed
484 HalfOpen,
485}
486
487/// Circuit breaker port for resilience
488///
489/// Implements the circuit breaker pattern to prevent cascading failures
490/// when external services are unavailable. Uses interior mutability
491/// for state management.
492///
493/// # Example Implementation
494///
495/// ```no_run
496/// use stygian_graph::ports::{CircuitBreaker, CircuitState};
497/// use stygian_graph::domain::error::Result;
498/// use parking_lot::RwLock;
499/// use std::sync::Arc;
500///
501/// struct MyCircuitBreaker {
502/// state: Arc<RwLock<CircuitState>>,
503/// }
504///
505/// impl CircuitBreaker for MyCircuitBreaker {
506/// fn state(&self) -> CircuitState {
507/// *self.state.read()
508/// }
509///
510/// fn record_success(&self) {
511/// let mut state = self.state.write();
512/// *state = CircuitState::Closed;
513/// }
514///
515/// fn record_failure(&self) {
516/// let mut state = self.state.write();
517/// *state = CircuitState::Open;
518/// }
519///
520/// fn attempt_reset(&self) -> bool {
521/// let mut state = self.state.write();
522/// if matches!(*state, CircuitState::Open) {
523/// *state = CircuitState::HalfOpen;
524/// true
525/// } else {
526/// false
527/// }
528/// }
529/// }
530/// ```
531pub trait CircuitBreaker: Send + Sync {
532 /// Get current circuit breaker state
533 ///
534 /// # Returns
535 ///
536 /// Current state (`Closed`, `Open`, or `HalfOpen`)
537 ///
538 /// # Example
539 ///
540 /// ```no_run
541 /// # use stygian_graph::ports::{CircuitBreaker, CircuitState};
542 /// # fn example(cb: impl CircuitBreaker) {
543 /// match cb.state() {
544 /// CircuitState::Closed => println!("Normal operation"),
545 /// CircuitState::Open => println!("Circuit is open, failing fast"),
546 /// CircuitState::HalfOpen => println!("Testing recovery"),
547 /// }
548 /// # }
549 /// ```
550 fn state(&self) -> CircuitState;
551
552 /// Record successful operation
553 ///
554 /// Transitions `HalfOpen` -> `Closed`, maintains `Closed` state.
555 ///
556 /// # Example
557 ///
558 /// ```no_run
559 /// # use stygian_graph::ports::CircuitBreaker;
560 /// # fn example(cb: impl CircuitBreaker) {
561 /// // After successful API call
562 /// cb.record_success();
563 /// # }
564 /// ```
565 fn record_success(&self);
566
567 /// Record failed operation
568 ///
569 /// May transition `Closed` -> `Open` or `HalfOpen` -> `Open` depending on
570 /// failure threshold configuration.
571 ///
572 /// # Example
573 ///
574 /// ```no_run
575 /// # use stygian_graph::ports::CircuitBreaker;
576 /// # fn example(cb: impl CircuitBreaker) {
577 /// // After failed API call
578 /// cb.record_failure();
579 /// # }
580 /// ```
581 fn record_failure(&self);
582
583 /// Attempt to reset circuit from `Open` to `HalfOpen`
584 ///
585 /// Called after timeout period to test if service recovered.
586 ///
587 /// # Returns
588 ///
589 /// * `true` - Successfully transitioned to `HalfOpen`
590 /// * `false` - Already in `Closed` or `HalfOpen` state
591 ///
592 /// # Example
593 ///
594 /// ```no_run
595 /// # use stygian_graph::ports::CircuitBreaker;
596 /// # fn example(cb: impl CircuitBreaker) {
597 /// if cb.attempt_reset() {
598 /// println!("Circuit breaker now in HalfOpen state");
599 /// }
600 /// # }
601 /// ```
602 fn attempt_reset(&self) -> bool;
603}
604
605/// Rate limit configuration
606///
607/// Defines the rate limiting parameters using a token bucket approach.
608///
609/// # Example
610///
611/// ```
612/// use stygian_graph::ports::RateLimitConfig;
613/// use std::time::Duration;
614///
615/// // Allow 100 requests per minute
616/// let config = RateLimitConfig {
617/// max_requests: 100,
618/// window: Duration::from_secs(60),
619/// };
620/// ```
621#[derive(Debug, Clone, Copy)]
622pub struct RateLimitConfig {
623 /// Maximum number of requests allowed in the time window
624 pub max_requests: u32,
625 /// Time window for rate limiting
626 pub window: std::time::Duration,
627}
628
629/// Rate limiter port
630///
631/// Implements rate limiting to prevent overwhelming external services
632/// or exceeding API quotas. Supports per-key rate limiting for
633/// multi-tenant scenarios.
634///
635/// # Example Implementation
636///
637/// ```no_run
638/// use stygian_graph::ports::RateLimiter;
639/// use stygian_graph::domain::error::Result;
640/// use async_trait::async_trait;
641///
642/// struct MyRateLimiter;
643///
644/// #[async_trait]
645/// impl RateLimiter for MyRateLimiter {
646/// async fn check_rate_limit(&self, key: &str) -> Result<bool> {
647/// // Check if key is within rate limit
648/// Ok(true)
649/// }
650///
651/// async fn record_request(&self, key: &str) -> Result<()> {
652/// // Record request for rate limiting
653/// Ok(())
654/// }
655/// }
656/// ```
657#[async_trait]
658pub trait RateLimiter: Send + Sync {
659 /// Check if key is within rate limit
660 ///
661 /// # Arguments
662 ///
663 /// * `key` - Rate limit key (service name, API endpoint, user ID, etc.)
664 ///
665 /// # Returns
666 ///
667 /// * `Ok(true)` - Request allowed
668 /// * `Ok(false)` - Rate limit exceeded
669 /// * `Err(RateLimitError)` - Backend failure
670 ///
671 /// # Example
672 ///
673 /// ```no_run
674 /// # use stygian_graph::ports::RateLimiter;
675 /// # async fn example(limiter: impl RateLimiter) {
676 /// if limiter.check_rate_limit("api:openai").await.unwrap() {
677 /// println!("Request allowed");
678 /// } else {
679 /// println!("Rate limit exceeded, retry later");
680 /// }
681 /// # }
682 /// ```
683 async fn check_rate_limit(&self, key: &str) -> Result<bool>;
684
685 /// Record a request for rate limiting
686 ///
687 /// Should be called after successful operation to update the rate limit counter.
688 ///
689 /// # Arguments
690 ///
691 /// * `key` - Rate limit key
692 ///
693 /// # Returns
694 ///
695 /// * `Ok(())` - Request recorded
696 /// * `Err(RateLimitError)` - Backend failure
697 ///
698 /// # Example
699 ///
700 /// ```no_run
701 /// # use stygian_graph::ports::RateLimiter;
702 /// # async fn example(limiter: impl RateLimiter) {
703 /// // After making API call
704 /// limiter.record_request("api:openai").await.unwrap();
705 /// # }
706 /// ```
707 async fn record_request(&self, key: &str) -> Result<()>;
708}
709
710// ─────────────────────────────────────────────────────────────────────────────
711// GraphQL auth types
712// ─────────────────────────────────────────────────────────────────────────────
713
714/// Authentication strategy for a GraphQL request.
715///
716/// # Example
717///
718/// ```rust
719/// use stygian_graph::ports::{GraphQlAuth, GraphQlAuthKind};
720///
721/// let auth = GraphQlAuth {
722/// kind: GraphQlAuthKind::Bearer,
723/// token: "${env:MY_TOKEN}".to_string(),
724/// header_name: None,
725/// };
726/// ```
727#[derive(Debug, Clone)]
728pub struct GraphQlAuth {
729 /// The authentication strategy to apply
730 pub kind: GraphQlAuthKind,
731 /// The token value (supports `${env:VAR}` expansion)
732 pub token: String,
733 /// Custom header name (required when `kind == Header`)
734 pub header_name: Option<String>,
735}
736
737/// Discriminant for `GraphQlAuth`
738#[derive(Debug, Clone, PartialEq, Eq)]
739pub enum GraphQlAuthKind {
740 /// `Authorization: Bearer <token>`
741 Bearer,
742 /// `X-Api-Key: <token>`
743 ApiKey,
744 /// Arbitrary header specified by `header_name`
745 Header,
746 /// No authentication
747 None,
748}
749
750/// GraphQL plugin sub-module
751pub mod graphql_plugin;
752
753/// Work queue port — distributed task execution
754pub mod work_queue;
755
756/// WASM plugin port — dynamic plugin loading
757pub mod wasm_plugin;
758
759/// Storage port — persist and retrieve pipeline results
760pub mod storage;
761
762/// Auth port — runtime token loading, expiry checking, and refresh.
763pub mod auth;
764
765/// Signing port — attach HMAC, OAuth 1.0a, AWS Sig V4, or Frida RPC signatures to requests.
766pub mod signing;
767
768/// Database source port — query databases as pipeline data sources
769pub mod data_source;
770
771/// Document source port — read files from the local file system
772pub mod document_source;
773
774/// Stream source port — consume SSE, WebSocket, or message queue feeds
775pub mod stream_source;
776
777/// Agent source port — use an LLM as a pipeline data source
778pub mod agent_source;
779
780/// Webhook trigger port — accept inbound HTTP webhooks that start pipelines
781pub mod webhook;
782
783/// Tiered request escalation port — decide when to escalate from plain HTTP to browser
784pub mod escalation;
785
786/// Data sink port — publish scraped records to an external system
787pub mod data_sink;