Skip to main content

stygian_graph/
ports.rs

1//! Port trait definitions - service abstractions
2//!
3//! Defines interfaces that adapters must implement.
4//! Following hexagonal architecture, these are the "ports" that connect
5//! domain logic to external infrastructure.
6
7use crate::domain::error::Result;
8use async_trait::async_trait;
9use serde_json::Value;
10
11/// Input to a scraping service
12///
13/// Contains the target URL and service-specific parameters that configure
14/// how the scraping operation should be performed.
15///
16/// # Example
17///
18/// ```
19/// use stygian_graph::ports::ServiceInput;
20/// use serde_json::json;
21///
22/// let input = ServiceInput {
23///     url: "https://example.com".to_string(),
24///     params: json!({
25///         "timeout_ms": 5000,
26///         "user_agent": "stygian/1.0"
27///     }),
28/// };
29/// ```
30#[derive(Debug, Clone)]
31pub struct ServiceInput {
32    /// Target URL to scrape
33    pub url: String,
34    /// Service-specific parameters (timeout, headers, etc.)
35    pub params: Value,
36}
37
38/// Output from a scraping service
39///
40/// Contains the raw scraped data and metadata about the operation
41/// for downstream processing and debugging.
42///
43/// # Example
44///
45/// ```
46/// use stygian_graph::ports::ServiceOutput;
47/// use serde_json::json;
48///
49/// let output = ServiceOutput {
50///     data: "<html>...</html>".to_string(),
51///     metadata: json!({
52///         "status_code": 200,
53///         "content_type": "text/html",
54///         "response_time_ms": 145
55///     }),
56/// };
57/// ```
58#[derive(Debug, Clone)]
59pub struct ServiceOutput {
60    /// Raw scraped data (HTML, JSON, binary, etc.)
61    pub data: String,
62    /// Metadata about the operation (status, timing, headers)
63    pub metadata: Value,
64}
65
66/// Primary port: `ScrapingService` trait
67///
68/// All scraping modules (HTTP, browser, JavaScript rendering) implement this trait.
69/// Uses `async_trait` for dyn compatibility with service registry.
70///
71/// # Example Implementation
72///
73/// ```no_run
74/// use stygian_graph::ports::{ScrapingService, ServiceInput, ServiceOutput};
75/// use stygian_graph::error::Result;
76/// use async_trait::async_trait;
77/// use serde_json::json;
78///
79/// struct MyService;
80///
81/// #[async_trait]
82/// impl ScrapingService for MyService {
83///     async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
84///         Ok(ServiceOutput {
85///             data: format!("Scraped: {}", input.url),
86///             metadata: json!({"status": "ok"}),
87///         })
88///     }
89///     
90///     fn name(&self) -> &'static str {
91///         "my-service"
92///     }
93/// }
94/// ```
95#[async_trait]
96pub trait ScrapingService: Send + Sync {
97    /// Execute the scraping operation
98    ///
99    /// # Arguments
100    ///
101    /// * `input` - Service input containing URL and parameters
102    ///
103    /// # Returns
104    ///
105    /// * `Ok(ServiceOutput)` - Successful scraping result
106    /// * `Err(StygianError)` - Service error, timeout, or network failure
107    ///
108    /// # Example
109    ///
110    /// ```no_run
111    /// # use stygian_graph::ports::{ScrapingService, ServiceInput};
112    /// # use serde_json::json;
113    /// # async fn example(service: impl ScrapingService) {
114    /// let input = ServiceInput {
115    ///     url: "https://example.com".to_string(),
116    ///     params: json!({}),
117    /// };
118    /// let output = service.execute(input).await.unwrap();
119    /// println!("Data: {}", output.data);
120    /// # }
121    /// ```
122    async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput>;
123
124    /// Service name for identification in logs and metrics
125    ///
126    /// # Example
127    ///
128    /// ```no_run
129    /// # use stygian_graph::ports::ScrapingService;
130    /// # fn example(service: impl ScrapingService) {
131    /// println!("Using service: {}", service.name());
132    /// # }
133    /// ```
134    fn name(&self) -> &'static str;
135}
136
137/// Provider capability flags
138///
139/// Describes the capabilities supported by an AI provider.
140///
141/// # Example
142///
143/// ```
144/// use stygian_graph::ports::ProviderCapabilities;
145///
146/// let caps = ProviderCapabilities {
147///     streaming: true,
148///     vision: false,
149///     tool_use: true,
150///     json_mode: true,
151/// };
152/// assert!(caps.streaming, "Provider supports streaming");
153/// ```
154#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
155#[allow(clippy::struct_excessive_bools)] // 4 capability flags are clearer as named bools than a u8 bitmask
156pub struct ProviderCapabilities {
157    /// Supports streaming responses
158    pub streaming: bool,
159    /// Supports image/video analysis
160    pub vision: bool,
161    /// Supports function calling/tool use
162    pub tool_use: bool,
163    /// Native JSON output mode
164    pub json_mode: bool,
165}
166
167/// AI Provider port for LLM-based extraction
168///
169/// All LLM providers (Claude, GPT, Gemini, GitHub Copilot, Ollama) implement this trait.
170/// Uses `async_trait` for dyn compatibility with service registry.
171///
172/// # Example Implementation
173///
174/// ```no_run
175/// use stygian_graph::ports::{AIProvider, ProviderCapabilities};
176/// use stygian_graph::domain::error::Result;
177/// use async_trait::async_trait;
178/// use serde_json::{json, Value};
179/// use futures::stream::{Stream, BoxStream};
180///
181/// struct MyProvider;
182///
183/// #[async_trait]
184/// impl AIProvider for MyProvider {
185///     async fn extract(&self, content: String, schema: Value) -> Result<Value> {
186///         Ok(json!({"extracted": "data"}))
187///     }
188///
189///     async fn stream_extract(
190///         &self,
191///         content: String,
192///         schema: Value,
193///     ) -> Result<BoxStream<'static, Result<Value>>> {
194///         unimplemented!("Streaming not supported")
195///     }
196///
197///     fn capabilities(&self) -> ProviderCapabilities {
198///         ProviderCapabilities {
199///             streaming: false,
200///             vision: false,
201///             tool_use: true,
202///             json_mode: true,
203///         }
204///     }
205///
206///     fn name(&self) -> &'static str {
207///         "my-provider"
208///     }
209/// }
210/// ```
211#[async_trait]
212pub trait AIProvider: Send + Sync {
213    /// Extract structured data from content using LLM
214    ///
215    /// # Arguments
216    ///
217    /// * `content` - Raw content to analyze (text, HTML, etc.)
218    /// * `schema` - JSON schema defining expected output structure
219    ///
220    /// # Returns
221    ///
222    /// * `Ok(Value)` - Extracted data matching the schema
223    /// * `Err(ProviderError)` - API error, token limit, or policy violation
224    ///
225    /// # Example
226    ///
227    /// ```no_run
228    /// # use stygian_graph::ports::AIProvider;
229    /// # use serde_json::json;
230    /// # async fn example(provider: impl AIProvider) {
231    /// let schema = json!({
232    ///     "type": "object",
233    ///     "properties": {
234    ///         "title": {"type": "string"},
235    ///         "price": {"type": "number"}
236    ///     }
237    /// });
238    /// let result = provider.extract("<html>...</html>".to_string(), schema).await.unwrap();
239    /// println!("Extracted: {}", result);
240    /// # }
241    /// ```
242    async fn extract(&self, content: String, schema: Value) -> Result<Value>;
243
244    /// Stream structured data extraction for real-time processing
245    ///
246    /// Returns a stream of partial results as they arrive from the LLM.
247    /// Only supported by providers with `capabilities().streaming == true`.
248    ///
249    /// # Arguments
250    ///
251    /// * `content` - Raw content to analyze
252    /// * `schema` - JSON schema defining expected output structure
253    ///
254    /// # Returns
255    ///
256    /// * `Ok(BoxStream)` - Stream of partial extraction results
257    /// * `Err(ProviderError)` - If streaming is not supported or API error
258    ///
259    /// # Example
260    ///
261    /// ```no_run
262    /// # use stygian_graph::ports::AIProvider;
263    /// # use serde_json::json;
264    /// # use futures::StreamExt;
265    /// # async fn example(provider: impl AIProvider) {
266    /// let schema = json!({"type": "object"});
267    /// let mut stream = provider.stream_extract("content".to_string(), schema).await.unwrap();
268    /// while let Some(result) = stream.next().await {
269    ///     match result {
270    ///         Ok(partial) => println!("Chunk: {}", partial),
271    ///         Err(e) => eprintln!("Stream error: {}", e),
272    ///     }
273    /// }
274    /// # }
275    /// ```
276    async fn stream_extract(
277        &self,
278        content: String,
279        schema: Value,
280    ) -> Result<futures::stream::BoxStream<'static, Result<Value>>>;
281
282    /// Get provider capabilities
283    ///
284    /// Returns a struct describing what features this provider supports.
285    ///
286    /// # Example
287    ///
288    /// ```no_run
289    /// # use stygian_graph::ports::AIProvider;
290    /// # fn example(provider: impl AIProvider) {
291    /// let caps = provider.capabilities();
292    /// if caps.streaming {
293    ///     println!("Provider supports streaming");
294    /// }
295    /// if caps.vision {
296    ///     println!("Provider supports image analysis");
297    /// }
298    /// # }
299    /// ```
300    fn capabilities(&self) -> ProviderCapabilities;
301
302    /// Provider name (claude, gpt, gemini, etc.)
303    ///
304    /// # Example
305    ///
306    /// ```no_run
307    /// # use stygian_graph::ports::AIProvider;
308    /// # fn example(provider: impl AIProvider) {
309    /// println!("Using provider: {}", provider.name());
310    /// # }
311    /// ```
312    fn name(&self) -> &'static str;
313}
314
315/// Cache port for storing/retrieving data with idempotency key support
316///
317/// Provides a key-value store interface for caching scraped content,
318/// API responses, and idempotency tracking.
319///
320/// # Example Implementation
321///
322/// ```no_run
323/// use stygian_graph::ports::CachePort;
324/// use stygian_graph::domain::error::Result;
325/// use async_trait::async_trait;
326/// use std::time::Duration;
327///
328/// struct MyCache;
329///
330/// #[async_trait]
331/// impl CachePort for MyCache {
332///     async fn get(&self, key: &str) -> Result<Option<String>> {
333///         // Fetch from cache backend
334///         Ok(Some("cached_value".to_string()))
335///     }
336///
337///     async fn set(&self, key: &str, value: String, ttl: Option<Duration>) -> Result<()> {
338///         // Store in cache backend
339///         Ok(())
340///     }
341///
342///     async fn invalidate(&self, key: &str) -> Result<()> {
343///         // Remove from cache
344///         Ok(())
345///     }
346///
347///     async fn exists(&self, key: &str) -> Result<bool> {
348///         // Check existence
349///         Ok(true)
350///     }
351/// }
352/// ```
353#[async_trait]
354pub trait CachePort: Send + Sync {
355    /// Get value from cache
356    ///
357    /// # Arguments
358    ///
359    /// * `key` - Cache key (URL, idempotency key, etc.)
360    ///
361    /// # Returns
362    ///
363    /// * `Ok(Some(String))` - Cache hit
364    /// * `Ok(None)` - Cache miss
365    /// * `Err(CacheError)` - Backend failure
366    ///
367    /// # Example
368    ///
369    /// ```no_run
370    /// # use stygian_graph::ports::CachePort;
371    /// # async fn example(cache: impl CachePort) {
372    /// match cache.get("page:123").await {
373    ///     Ok(Some(content)) => println!("Cache hit: {}", content),
374    ///     Ok(None) => println!("Cache miss"),
375    ///     Err(e) => eprintln!("Cache error: {}", e),
376    /// }
377    /// # }
378    /// ```
379    async fn get(&self, key: &str) -> Result<Option<String>>;
380
381    /// Set value in cache with optional TTL
382    ///
383    /// # Arguments
384    ///
385    /// * `key` - Cache key
386    /// * `value` - Value to store (JSON, HTML, etc.)
387    /// * `ttl` - Optional expiration duration
388    ///
389    /// # Returns
390    ///
391    /// * `Ok(())` - Successfully stored
392    /// * `Err(CacheError)` - Backend failure
393    ///
394    /// # Example
395    ///
396    /// ```no_run
397    /// # use stygian_graph::ports::CachePort;
398    /// # use std::time::Duration;
399    /// # async fn example(cache: impl CachePort) {
400    /// cache.set(
401    ///     "page:123",
402    ///     "<html>...</html>".to_string(),
403    ///     Some(Duration::from_secs(3600))
404    /// ).await.unwrap();
405    /// # }
406    /// ```
407    async fn set(&self, key: &str, value: String, ttl: Option<std::time::Duration>) -> Result<()>;
408
409    /// Invalidate cache entry
410    ///
411    /// # Arguments
412    ///
413    /// * `key` - Cache key to invalidate
414    ///
415    /// # Returns
416    ///
417    /// * `Ok(())` - Successfully invalidated (or key didn't exist)
418    /// * `Err(CacheError)` - Backend failure
419    ///
420    /// # Example
421    ///
422    /// ```no_run
423    /// # use stygian_graph::ports::CachePort;
424    /// # async fn example(cache: impl CachePort) {
425    /// cache.invalidate("page:123").await.unwrap();
426    /// # }
427    /// ```
428    async fn invalidate(&self, key: &str) -> Result<()>;
429
430    /// Check if key exists in cache
431    ///
432    /// # Arguments
433    ///
434    /// * `key` - Cache key to check
435    ///
436    /// # Returns
437    ///
438    /// * `Ok(true)` - Key exists
439    /// * `Ok(false)` - Key does not exist or expired
440    /// * `Err(CacheError)` - Backend failure
441    ///
442    /// # Example
443    ///
444    /// ```no_run
445    /// # use stygian_graph::ports::CachePort;
446    /// # async fn example(cache: impl CachePort) {
447    /// if cache.exists("page:123").await.unwrap() {
448    ///     println!("Page is cached");
449    /// }
450    /// # }
451    /// ```
452    async fn exists(&self, key: &str) -> Result<bool>;
453}
454
455/// Circuit breaker state
456///
457/// Represents the current state of a circuit breaker following
458/// the standard circuit breaker pattern.
459///
460/// # State Transitions
461///
462/// ```text
463/// Closed ---(too many failures)---> Open
464/// Open -----(timeout elapsed)-----> HalfOpen
465/// HalfOpen --(success)-----------> Closed
466/// HalfOpen --(failure)-----------> Open
467/// ```
468///
469/// # Example
470///
471/// ```
472/// use stygian_graph::ports::CircuitState;
473///
474/// let state = CircuitState::Closed;
475/// assert!(matches!(state, CircuitState::Closed));
476/// ```
477#[derive(Debug, Clone, Copy, PartialEq, Eq)]
478pub enum CircuitState {
479    /// Normal operation, requests pass through
480    Closed,
481    /// Circuit is open, requests fail fast
482    Open,
483    /// Testing if service recovered, limited requests allowed
484    HalfOpen,
485}
486
487/// Circuit breaker port for resilience
488///
489/// Implements the circuit breaker pattern to prevent cascading failures
490/// when external services are unavailable. Uses interior mutability
491/// for state management.
492///
493/// # Example Implementation
494///
495/// ```no_run
496/// use stygian_graph::ports::{CircuitBreaker, CircuitState};
497/// use stygian_graph::domain::error::Result;
498/// use parking_lot::RwLock;
499/// use std::sync::Arc;
500///
501/// struct MyCircuitBreaker {
502///     state: Arc<RwLock<CircuitState>>,
503/// }
504///
505/// impl CircuitBreaker for MyCircuitBreaker {
506///     fn state(&self) -> CircuitState {
507///         *self.state.read()
508///     }
509///
510///     fn record_success(&self) {
511///         let mut state = self.state.write();
512///         *state = CircuitState::Closed;
513///     }
514///
515///     fn record_failure(&self) {
516///         let mut state = self.state.write();
517///         *state = CircuitState::Open;
518///     }
519///
520///     fn attempt_reset(&self) -> bool {
521///         let mut state = self.state.write();
522///         if matches!(*state, CircuitState::Open) {
523///             *state = CircuitState::HalfOpen;
524///             true
525///         } else {
526///             false
527///         }
528///     }
529/// }
530/// ```
531pub trait CircuitBreaker: Send + Sync {
532    /// Get current circuit breaker state
533    ///
534    /// # Returns
535    ///
536    /// Current state (`Closed`, `Open`, or `HalfOpen`)
537    ///
538    /// # Example
539    ///
540    /// ```no_run
541    /// # use stygian_graph::ports::{CircuitBreaker, CircuitState};
542    /// # fn example(cb: impl CircuitBreaker) {
543    /// match cb.state() {
544    ///     CircuitState::Closed => println!("Normal operation"),
545    ///     CircuitState::Open => println!("Circuit is open, failing fast"),
546    ///     CircuitState::HalfOpen => println!("Testing recovery"),
547    /// }
548    /// # }
549    /// ```
550    fn state(&self) -> CircuitState;
551
552    /// Record successful operation
553    ///
554    /// Transitions `HalfOpen` -> `Closed`, maintains `Closed` state.
555    ///
556    /// # Example
557    ///
558    /// ```no_run
559    /// # use stygian_graph::ports::CircuitBreaker;
560    /// # fn example(cb: impl CircuitBreaker) {
561    /// // After successful API call
562    /// cb.record_success();
563    /// # }
564    /// ```
565    fn record_success(&self);
566
567    /// Record failed operation
568    ///
569    /// May transition `Closed` -> `Open` or `HalfOpen` -> `Open` depending on
570    /// failure threshold configuration.
571    ///
572    /// # Example
573    ///
574    /// ```no_run
575    /// # use stygian_graph::ports::CircuitBreaker;
576    /// # fn example(cb: impl CircuitBreaker) {
577    /// // After failed API call
578    /// cb.record_failure();
579    /// # }
580    /// ```
581    fn record_failure(&self);
582
583    /// Attempt to reset circuit from `Open` to `HalfOpen`
584    ///
585    /// Called after timeout period to test if service recovered.
586    ///
587    /// # Returns
588    ///
589    /// * `true` - Successfully transitioned to `HalfOpen`
590    /// * `false` - Already in `Closed` or `HalfOpen` state
591    ///
592    /// # Example
593    ///
594    /// ```no_run
595    /// # use stygian_graph::ports::CircuitBreaker;
596    /// # fn example(cb: impl CircuitBreaker) {
597    /// if cb.attempt_reset() {
598    ///     println!("Circuit breaker now in HalfOpen state");
599    /// }
600    /// # }
601    /// ```
602    fn attempt_reset(&self) -> bool;
603}
604
605/// Rate limit configuration
606///
607/// Defines the rate limiting parameters using a token bucket approach.
608///
609/// # Example
610///
611/// ```
612/// use stygian_graph::ports::RateLimitConfig;
613/// use std::time::Duration;
614///
615/// // Allow 100 requests per minute
616/// let config = RateLimitConfig {
617///     max_requests: 100,
618///     window: Duration::from_secs(60),
619/// };
620/// ```
621#[derive(Debug, Clone, Copy)]
622pub struct RateLimitConfig {
623    /// Maximum number of requests allowed in the time window
624    pub max_requests: u32,
625    /// Time window for rate limiting
626    pub window: std::time::Duration,
627}
628
629/// Rate limiter port
630///
631/// Implements rate limiting to prevent overwhelming external services
632/// or exceeding API quotas. Supports per-key rate limiting for
633/// multi-tenant scenarios.
634///
635/// # Example Implementation
636///
637/// ```no_run
638/// use stygian_graph::ports::RateLimiter;
639/// use stygian_graph::domain::error::Result;
640/// use async_trait::async_trait;
641///
642/// struct MyRateLimiter;
643///
644/// #[async_trait]
645/// impl RateLimiter for MyRateLimiter {
646///     async fn check_rate_limit(&self, key: &str) -> Result<bool> {
647///         // Check if key is within rate limit
648///         Ok(true)
649///     }
650///
651///     async fn record_request(&self, key: &str) -> Result<()> {
652///         // Record request for rate limiting
653///         Ok(())
654///     }
655/// }
656/// ```
657#[async_trait]
658pub trait RateLimiter: Send + Sync {
659    /// Check if key is within rate limit
660    ///
661    /// # Arguments
662    ///
663    /// * `key` - Rate limit key (service name, API endpoint, user ID, etc.)
664    ///
665    /// # Returns
666    ///
667    /// * `Ok(true)` - Request allowed
668    /// * `Ok(false)` - Rate limit exceeded
669    /// * `Err(RateLimitError)` - Backend failure
670    ///
671    /// # Example
672    ///
673    /// ```no_run
674    /// # use stygian_graph::ports::RateLimiter;
675    /// # async fn example(limiter: impl RateLimiter) {
676    /// if limiter.check_rate_limit("api:openai").await.unwrap() {
677    ///     println!("Request allowed");
678    /// } else {
679    ///     println!("Rate limit exceeded, retry later");
680    /// }
681    /// # }
682    /// ```
683    async fn check_rate_limit(&self, key: &str) -> Result<bool>;
684
685    /// Record a request for rate limiting
686    ///
687    /// Should be called after successful operation to update the rate limit counter.
688    ///
689    /// # Arguments
690    ///
691    /// * `key` - Rate limit key
692    ///
693    /// # Returns
694    ///
695    /// * `Ok(())` - Request recorded
696    /// * `Err(RateLimitError)` - Backend failure
697    ///
698    /// # Example
699    ///
700    /// ```no_run
701    /// # use stygian_graph::ports::RateLimiter;
702    /// # async fn example(limiter: impl RateLimiter) {
703    /// // After making API call
704    /// limiter.record_request("api:openai").await.unwrap();
705    /// # }
706    /// ```
707    async fn record_request(&self, key: &str) -> Result<()>;
708}
709
710// ─────────────────────────────────────────────────────────────────────────────
711// GraphQL auth types
712// ─────────────────────────────────────────────────────────────────────────────
713
714/// Authentication strategy for a GraphQL request.
715///
716/// # Example
717///
718/// ```rust
719/// use stygian_graph::ports::{GraphQlAuth, GraphQlAuthKind};
720///
721/// let auth = GraphQlAuth {
722///     kind: GraphQlAuthKind::Bearer,
723///     token: "${env:MY_TOKEN}".to_string(),
724///     header_name: None,
725/// };
726/// ```
727#[derive(Debug, Clone)]
728pub struct GraphQlAuth {
729    /// The authentication strategy to apply
730    pub kind: GraphQlAuthKind,
731    /// The token value (supports `${env:VAR}` expansion)
732    pub token: String,
733    /// Custom header name (required when `kind == Header`)
734    pub header_name: Option<String>,
735}
736
737/// Discriminant for `GraphQlAuth`
738#[derive(Debug, Clone, PartialEq, Eq)]
739pub enum GraphQlAuthKind {
740    /// `Authorization: Bearer <token>`
741    Bearer,
742    /// `X-Api-Key: <token>`
743    ApiKey,
744    /// Arbitrary header specified by `header_name`
745    Header,
746    /// No authentication
747    None,
748}
749
750/// GraphQL plugin sub-module
751pub mod graphql_plugin;
752
753/// Work queue port — distributed task execution
754pub mod work_queue;
755
756/// WASM plugin port — dynamic plugin loading
757pub mod wasm_plugin;
758
759/// Storage port — persist and retrieve pipeline results
760pub mod storage;
761
762/// Auth port — runtime token loading, expiry checking, and refresh.
763pub mod auth;
764
765/// Signing port — attach HMAC, OAuth 1.0a, AWS Sig V4, or Frida RPC signatures to requests.
766pub mod signing;
767
768/// Database source port — query databases as pipeline data sources
769pub mod data_source;
770
771/// Document source port — read files from the local file system
772pub mod document_source;
773
774/// Stream source port — consume SSE, WebSocket, or message queue feeds
775pub mod stream_source;
776
777/// Agent source port — use an LLM as a pipeline data source
778pub mod agent_source;
779
780/// Webhook trigger port — accept inbound HTTP webhooks that start pipelines
781pub mod webhook;
782
783/// Tiered request escalation port — decide when to escalate from plain HTTP to browser
784pub mod escalation;
785
786/// Data sink port — publish scraped records to an external system
787pub mod data_sink;