converge_core/
backend.rs

1// Copyright 2024-2025 Aprio One AB, Sweden
2// Author: Kenneth Pernyer, kenneth@aprio.one
3// SPDX-License-Identifier: LicenseRef-Proprietary
4
5//! LLM Backend Interface — The unification boundary for local and remote LLMs.
6//!
7//! # The Unification Rule
8//!
9//! All model invocations—local or remote—must produce the same top-level artifact:
10//! - `BackendResponse` containing `ProposedContent`(s)
11//! - Plus a `TraceLink` that makes the invocation auditable, budgeted, and comparable
12//!
13//! "Interchangeable" means:
14//! - Same request type
15//! - Same output type
16//! - Same contract evaluation surface
17//! - Different execution backend
18//!
19//! # Determinism Guarantees
20//!
21//! | Backend | Determinism | TraceLink |
22//! |---------|-------------|-----------|
23//! | Local (converge-llm) | Strong (replay-eligible) | `LocalTraceLink` |
24//! | Remote (providers) | Bounded stochasticity (audit-eligible) | `RemoteTraceLink` |
25//!
26//! Remote runs are:
27//! - **Auditable**: Full request/response + metadata
28//! - **Repeatable-ish**: Best effort (temp=0 helps)
29//! - **Non-replayable**: Strictly (model versions, safety layers can shift)
30//!
31//! # Example
32//!
33//! ```
34//! use converge_core::backend::{
35//!     LlmBackend, BackendCapability, BackendRequest, BackendResponse,
36//!     BackendPrompt, BackendBudgets, BackendResult,
37//! };
38//!
39//! // Both local and remote backends implement the same trait
40//! fn process_with_any_backend<B: LlmBackend>(
41//!     backend: &B,
42//!     request: &BackendRequest,
43//! ) -> BackendResult<BackendResponse> {
44//!     // Check capabilities first
45//!     if backend.supports_capability(BackendCapability::Replay) {
46//!         println!("Using replay-eligible backend: {}", backend.name());
47//!     }
48//!     backend.execute(request)
49//! }
50//! ```
51
52use serde::{Deserialize, Serialize};
53
54use crate::kernel_boundary::{ProposedContent, TraceLink};
55
56// ============================================================================
57// Backend Error
58// ============================================================================
59
60/// Error type for backend operations.
61///
62/// This is capability-agnostic - implementations can wrap their specific errors.
63///
64/// # Retryable Errors
65///
66/// Some errors are transient and can be retried:
67/// - `Timeout` - operation exceeded deadline but might succeed on retry
68/// - `Unavailable` - backend temporarily unavailable
69/// - `ExecutionFailed` - if caused by transient infrastructure issues
70///
71/// Use `is_retryable()` to check if an error should trigger retry logic.
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub enum BackendError {
74    /// Request validation failed (NOT retryable - fix the request)
75    InvalidRequest { message: String },
76    /// Backend execution failed (may be retryable depending on cause)
77    ExecutionFailed { message: String },
78    /// Backend is unavailable (retryable - try again later)
79    Unavailable { message: String },
80    /// Budget exceeded (NOT retryable - increase budget or reduce request)
81    BudgetExceeded { resource: String, limit: String },
82    /// Contract validation failed (NOT retryable - output doesn't match contract)
83    ContractFailed { contract: String, message: String },
84    /// Capability not supported (NOT retryable - use different backend)
85    UnsupportedCapability { capability: BackendCapability },
86    /// Adapter not found or incompatible (NOT retryable - fix configuration)
87    AdapterError { message: String },
88    /// Recall operation failed (may be retryable)
89    RecallError { message: String },
90    /// Operation timed out (retryable - might succeed with more time)
91    Timeout {
92        /// Configured deadline in milliseconds
93        deadline_ms: u64,
94        /// Actual elapsed time in milliseconds
95        elapsed_ms: u64,
96    },
97    /// Circuit breaker is open (NOT retryable until circuit closes)
98    CircuitOpen {
99        /// Name of the backend with open circuit
100        backend: String,
101        /// When the circuit will transition to half-open (Unix timestamp ms)
102        retry_after_ms: Option<u64>,
103    },
104    /// Retryable wrapper - indicates retry was attempted
105    Retried {
106        /// The final error after all retries exhausted
107        message: String,
108        /// Number of attempts made
109        attempts: usize,
110        /// Whether the underlying error was transient
111        was_transient: bool,
112    },
113    /// Generic error with context
114    Other { message: String },
115}
116
117impl std::fmt::Display for BackendError {
118    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119        match self {
120            Self::InvalidRequest { message } => write!(f, "Invalid request: {}", message),
121            Self::ExecutionFailed { message } => write!(f, "Execution failed: {}", message),
122            Self::Unavailable { message } => write!(f, "Backend unavailable: {}", message),
123            Self::BudgetExceeded { resource, limit } => {
124                write!(f, "Budget exceeded: {} (limit: {})", resource, limit)
125            }
126            Self::ContractFailed { contract, message } => {
127                write!(f, "Contract '{}' failed: {}", contract, message)
128            }
129            Self::UnsupportedCapability { capability } => {
130                write!(f, "Unsupported capability: {:?}", capability)
131            }
132            Self::AdapterError { message } => write!(f, "Adapter error: {}", message),
133            Self::RecallError { message } => write!(f, "Recall error: {}", message),
134            Self::Timeout {
135                deadline_ms,
136                elapsed_ms,
137            } => {
138                write!(
139                    f,
140                    "Operation timed out: elapsed {}ms, deadline {}ms",
141                    elapsed_ms, deadline_ms
142                )
143            }
144            Self::CircuitOpen {
145                backend,
146                retry_after_ms,
147            } => {
148                if let Some(retry_after) = retry_after_ms {
149                    write!(
150                        f,
151                        "Circuit breaker open for '{}', retry after {}ms",
152                        backend, retry_after
153                    )
154                } else {
155                    write!(f, "Circuit breaker open for '{}'", backend)
156                }
157            }
158            Self::Retried {
159                message,
160                attempts,
161                was_transient,
162            } => {
163                write!(
164                    f,
165                    "Failed after {} attempts (transient: {}): {}",
166                    attempts, was_transient, message
167                )
168            }
169            Self::Other { message } => write!(f, "{}", message),
170        }
171    }
172}
173
174impl std::error::Error for BackendError {}
175
176impl BackendError {
177    /// Check if this error is retryable.
178    ///
179    /// Retryable errors are transient and might succeed on retry:
180    /// - Timeout
181    /// - Unavailable
182    /// - Some ExecutionFailed (network issues, rate limits)
183    ///
184    /// Non-retryable errors require intervention:
185    /// - InvalidRequest (fix the request)
186    /// - BudgetExceeded (increase budget)
187    /// - ContractFailed (output doesn't match)
188    /// - UnsupportedCapability (use different backend)
189    /// - CircuitOpen (wait for circuit to close)
190    #[must_use]
191    pub fn is_retryable(&self) -> bool {
192        match self {
193            Self::Timeout { .. } => true,
194            Self::Unavailable { .. } => true,
195            Self::ExecutionFailed { message } => {
196                // Heuristic: network/rate limit errors are retryable
197                let msg_lower = message.to_lowercase();
198                msg_lower.contains("timeout")
199                    || msg_lower.contains("rate limit")
200                    || msg_lower.contains("429")
201                    || msg_lower.contains("503")
202                    || msg_lower.contains("502")
203                    || msg_lower.contains("504")
204                    || msg_lower.contains("connection")
205                    || msg_lower.contains("network")
206            }
207            Self::RecallError { message } => {
208                // Recall errors might be transient (embedding service down)
209                let msg_lower = message.to_lowercase();
210                msg_lower.contains("timeout") || msg_lower.contains("unavailable")
211            }
212            // Not retryable
213            Self::InvalidRequest { .. } => false,
214            Self::BudgetExceeded { .. } => false,
215            Self::ContractFailed { .. } => false,
216            Self::UnsupportedCapability { .. } => false,
217            Self::AdapterError { .. } => false,
218            Self::CircuitOpen { .. } => false, // Must wait for circuit to close
219            Self::Retried { .. } => false,     // Already retried
220            Self::Other { .. } => false,
221        }
222    }
223
224    /// Check if this error indicates the backend is overloaded.
225    ///
226    /// Used by circuit breakers to track failure patterns.
227    #[must_use]
228    pub fn is_overload(&self) -> bool {
229        match self {
230            Self::Unavailable { .. } => true,
231            Self::Timeout { .. } => true,
232            Self::ExecutionFailed { message } => {
233                let msg_lower = message.to_lowercase();
234                msg_lower.contains("rate limit")
235                    || msg_lower.contains("429")
236                    || msg_lower.contains("503")
237                    || msg_lower.contains("overloaded")
238            }
239            _ => false,
240        }
241    }
242}
243
244/// Result type for backend operations.
245pub type BackendResult<T> = Result<T, BackendError>;
246
247// ============================================================================
248// Backend Capability
249// ============================================================================
250
251/// Backend capabilities for routing decisions.
252///
253/// These capabilities determine what a backend can do and influence
254/// which backend is selected for a given request.
255#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
256pub enum BackendCapability {
257    /// Deterministic replay - same inputs produce identical outputs
258    Replay,
259    /// LoRA adapters for task-specific tuning
260    Adapters,
261    /// Recall injection from corpus
262    Recall,
263    /// Step-level contract validation
264    StepContracts,
265    /// Frontier reasoning capabilities (Claude Opus, GPT-4, etc.)
266    FrontierReasoning,
267    /// Fast iteration for interactive use
268    FastIteration,
269    /// Offline operation (no network required)
270    Offline,
271    /// Streaming output
272    Streaming,
273    /// Vision/multimodal input
274    Vision,
275    /// Tool use / function calling
276    ToolUse,
277}
278
279// ============================================================================
280// Retry Policy
281// ============================================================================
282
283/// Backoff strategy for retries.
284#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
285pub enum BackoffStrategy {
286    /// Fixed delay between retries
287    Fixed,
288    /// Linear increase: delay * attempt
289    Linear,
290    /// Exponential increase: delay * 2^attempt
291    Exponential,
292}
293
294impl Default for BackoffStrategy {
295    fn default() -> Self {
296        Self::Exponential
297    }
298}
299
300/// Configuration for retry behavior.
301///
302/// # Example
303///
304/// ```
305/// use converge_core::backend::{RetryPolicy, BackoffStrategy};
306///
307/// // Retry up to 3 times with exponential backoff starting at 100ms
308/// let policy = RetryPolicy {
309///     max_attempts: 3,
310///     initial_delay_ms: 100,
311///     max_delay_ms: 5000,
312///     backoff: BackoffStrategy::Exponential,
313///     jitter_percent: 20,
314/// };
315///
316/// assert_eq!(policy.delay_for_attempt(1), 100); // First retry: 100ms
317/// // Second retry: ~200ms, Third retry: ~400ms (plus jitter)
318/// ```
319#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
320pub struct RetryPolicy {
321    /// Maximum number of attempts (including initial attempt)
322    pub max_attempts: usize,
323    /// Initial delay between retries in milliseconds
324    pub initial_delay_ms: u64,
325    /// Maximum delay cap in milliseconds
326    pub max_delay_ms: u64,
327    /// Backoff strategy
328    pub backoff: BackoffStrategy,
329    /// Jitter percentage (0-100) to add randomness to delays
330    pub jitter_percent: u8,
331}
332
333impl Default for RetryPolicy {
334    fn default() -> Self {
335        Self {
336            max_attempts: 3,
337            initial_delay_ms: 100,
338            max_delay_ms: 10_000,
339            backoff: BackoffStrategy::Exponential,
340            jitter_percent: 20,
341        }
342    }
343}
344
345impl RetryPolicy {
346    /// Create a policy that never retries.
347    #[must_use]
348    pub fn no_retry() -> Self {
349        Self {
350            max_attempts: 1,
351            ..Default::default()
352        }
353    }
354
355    /// Create an aggressive retry policy for critical operations.
356    #[must_use]
357    pub fn aggressive() -> Self {
358        Self {
359            max_attempts: 5,
360            initial_delay_ms: 50,
361            max_delay_ms: 30_000,
362            backoff: BackoffStrategy::Exponential,
363            jitter_percent: 25,
364        }
365    }
366
367    /// Calculate the delay for a given attempt number (1-indexed).
368    ///
369    /// Does not include jitter - caller should add jitter separately.
370    #[must_use]
371    pub fn delay_for_attempt(&self, attempt: usize) -> u64 {
372        if attempt == 0 {
373            return 0;
374        }
375        let attempt = attempt.saturating_sub(1); // Convert to 0-indexed for calculation
376
377        let delay = match self.backoff {
378            BackoffStrategy::Fixed => self.initial_delay_ms,
379            BackoffStrategy::Linear => self.initial_delay_ms.saturating_mul(attempt as u64 + 1),
380            BackoffStrategy::Exponential => {
381                self.initial_delay_ms.saturating_mul(1u64 << attempt.min(10))
382            }
383        };
384
385        delay.min(self.max_delay_ms)
386    }
387
388    /// Check if another attempt should be made.
389    #[must_use]
390    pub fn should_retry(&self, attempt: usize) -> bool {
391        attempt < self.max_attempts
392    }
393}
394
395// ============================================================================
396// Circuit Breaker
397// ============================================================================
398
399/// Configuration for circuit breaker behavior.
400///
401/// Circuit breakers protect against cascading failures by stopping requests
402/// to a failing backend until it recovers.
403///
404/// # States
405///
406/// - **Closed**: Normal operation, requests pass through
407/// - **Open**: Backend is failing, requests fast-fail immediately
408/// - **Half-Open**: Testing if backend recovered, limited requests allowed
409///
410/// # Example
411///
412/// ```
413/// use converge_core::backend::CircuitBreakerConfig;
414///
415/// let config = CircuitBreakerConfig {
416///     failure_threshold: 5,
417///     success_threshold: 2,
418///     timeout_ms: 30_000,
419///     half_open_max_requests: 3,
420/// };
421/// ```
422#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
423pub struct CircuitBreakerConfig {
424    /// Number of failures before circuit opens
425    pub failure_threshold: usize,
426    /// Number of successes in half-open state before closing
427    pub success_threshold: usize,
428    /// Time in milliseconds before transitioning from open to half-open
429    pub timeout_ms: u64,
430    /// Maximum requests allowed in half-open state
431    pub half_open_max_requests: usize,
432}
433
434impl Default for CircuitBreakerConfig {
435    fn default() -> Self {
436        Self {
437            failure_threshold: 5,
438            success_threshold: 2,
439            timeout_ms: 30_000,
440            half_open_max_requests: 3,
441        }
442    }
443}
444
445impl CircuitBreakerConfig {
446    /// Create a sensitive circuit breaker that opens quickly.
447    #[must_use]
448    pub fn sensitive() -> Self {
449        Self {
450            failure_threshold: 3,
451            success_threshold: 1,
452            timeout_ms: 15_000,
453            half_open_max_requests: 1,
454        }
455    }
456
457    /// Create a tolerant circuit breaker that allows more failures.
458    #[must_use]
459    pub fn tolerant() -> Self {
460        Self {
461            failure_threshold: 10,
462            success_threshold: 3,
463            timeout_ms: 60_000,
464            half_open_max_requests: 5,
465        }
466    }
467
468    /// Disable circuit breaker (never opens).
469    #[must_use]
470    pub fn disabled() -> Self {
471        Self {
472            failure_threshold: usize::MAX,
473            success_threshold: 1,
474            timeout_ms: 0,
475            half_open_max_requests: usize::MAX,
476        }
477    }
478}
479
480/// Current state of a circuit breaker.
481///
482/// This is runtime state, not configuration. Implementations track this
483/// per-backend to manage circuit breaker behavior.
484#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
485pub enum CircuitState {
486    /// Normal operation - requests pass through
487    #[default]
488    Closed,
489    /// Backend failing - requests fast-fail
490    Open,
491    /// Testing recovery - limited requests allowed
492    HalfOpen,
493}
494
495// ============================================================================
496// Backend Request
497// ============================================================================
498
499/// A unified request to any LLM backend.
500///
501/// Both local kernel and remote providers accept this same request type.
502#[derive(Debug, Clone, Serialize, Deserialize)]
503pub struct BackendRequest {
504    /// Intent identifier for tracking
505    pub intent_id: String,
506    /// Truth targets this invocation aims to satisfy
507    pub truth_ids: Vec<String>,
508    /// Prompt version for reproducibility
509    pub prompt_version: String,
510    /// Hash of the state injection (for audit)
511    pub state_injection_hash: String,
512    /// The actual prompt/messages to send
513    pub prompt: BackendPrompt,
514    /// Contracts to validate against
515    pub contracts: Vec<ContractSpec>,
516    /// Resource budgets
517    pub budgets: BackendBudgets,
518    /// Recall policy (optional, local-only capability)
519    pub recall_policy: Option<BackendRecallPolicy>,
520    /// Adapter policy (optional, local-only capability)
521    pub adapter_policy: Option<BackendAdapterPolicy>,
522    /// Retry policy (optional, overrides backend default)
523    #[serde(default)]
524    pub retry_policy: Option<RetryPolicy>,
525}
526
527/// The prompt content for the backend.
528#[derive(Debug, Clone, Serialize, Deserialize)]
529pub enum BackendPrompt {
530    /// Simple text prompt
531    Text(String),
532    /// Chat-style messages
533    Messages(Vec<Message>),
534}
535
536/// A chat message.
537#[derive(Debug, Clone, Serialize, Deserialize)]
538pub struct Message {
539    pub role: MessageRole,
540    pub content: String,
541}
542
543impl Message {
544    /// Create a system message.
545    pub fn system(content: impl Into<String>) -> Self {
546        Self {
547            role: MessageRole::System,
548            content: content.into(),
549        }
550    }
551
552    /// Create a user message.
553    pub fn user(content: impl Into<String>) -> Self {
554        Self {
555            role: MessageRole::User,
556            content: content.into(),
557        }
558    }
559
560    /// Create an assistant message.
561    pub fn assistant(content: impl Into<String>) -> Self {
562        Self {
563            role: MessageRole::Assistant,
564            content: content.into(),
565        }
566    }
567}
568
569/// Message role in chat format.
570#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
571pub enum MessageRole {
572    System,
573    User,
574    Assistant,
575}
576
577/// Contract specification for validation.
578#[derive(Debug, Clone, Serialize, Deserialize)]
579pub struct ContractSpec {
580    /// Contract/truth name
581    pub name: String,
582    /// Expected output schema (JSON Schema)
583    pub schema: Option<serde_json::Value>,
584    /// Whether this contract is required to pass
585    pub required: bool,
586}
587
588/// Resource budgets for the invocation.
589#[derive(Debug, Clone, Serialize, Deserialize)]
590pub struct BackendBudgets {
591    /// Maximum tokens to generate
592    pub max_tokens: usize,
593    /// Maximum iterations (for multi-step)
594    pub max_iterations: usize,
595    /// Latency ceiling in milliseconds (0 = no limit)
596    pub latency_ceiling_ms: u64,
597    /// Maximum cost in microdollars (0 = no limit)
598    pub cost_ceiling_microdollars: u64,
599}
600
601impl Default for BackendBudgets {
602    fn default() -> Self {
603        Self {
604            max_tokens: 1024,
605            max_iterations: 1,
606            latency_ceiling_ms: 0,
607            cost_ceiling_microdollars: 0,
608        }
609    }
610}
611
612/// Recall policy for backend requests.
613#[derive(Debug, Clone, Serialize, Deserialize)]
614pub struct BackendRecallPolicy {
615    pub enabled: bool,
616    pub max_candidates: usize,
617    pub min_score: f32,
618    pub corpus_filter: Option<String>,
619}
620
621impl Default for BackendRecallPolicy {
622    fn default() -> Self {
623        Self {
624            enabled: false,
625            max_candidates: 5,
626            min_score: 0.5,
627            corpus_filter: None,
628        }
629    }
630}
631
632/// Adapter policy for backend requests.
633#[derive(Debug, Clone, Serialize, Deserialize)]
634pub struct BackendAdapterPolicy {
635    /// Explicit adapter ID (authority from outside)
636    pub adapter_id: Option<String>,
637    /// Whether adapter is required (fail if not available)
638    pub required: bool,
639}
640
641impl Default for BackendAdapterPolicy {
642    fn default() -> Self {
643        Self {
644            adapter_id: None,
645            required: false,
646        }
647    }
648}
649
650// ============================================================================
651// Backend Response
652// ============================================================================
653
654/// A unified response from any LLM backend.
655///
656/// Both local kernel and remote providers return this same response type.
657#[derive(Debug, Clone, Serialize, Deserialize)]
658pub struct BackendResponse {
659    /// The generated proposals
660    pub proposals: Vec<ProposedContent>,
661    /// Contract validation report
662    pub contract_report: ContractReport,
663    /// Trace link (backend-specific but normalized interface)
664    pub trace_link: TraceLink,
665    /// Resource usage
666    pub usage: BackendUsage,
667}
668
669/// Contract validation report for backend responses.
670#[derive(Debug, Clone, Serialize, Deserialize)]
671pub struct ContractReport {
672    /// Per-contract results
673    pub results: Vec<BackendContractResult>,
674    /// Overall pass/fail
675    pub all_passed: bool,
676}
677
678impl ContractReport {
679    /// Create an empty passing report.
680    pub fn empty_pass() -> Self {
681        Self {
682            results: vec![],
683            all_passed: true,
684        }
685    }
686
687    /// Create a report from results.
688    pub fn from_results(results: Vec<BackendContractResult>) -> Self {
689        let all_passed = results.iter().all(|r| r.passed);
690        Self { results, all_passed }
691    }
692}
693
694/// Result of a single contract check (backend-level).
695#[derive(Debug, Clone, Serialize, Deserialize)]
696pub struct BackendContractResult {
697    pub name: String,
698    pub passed: bool,
699    pub diagnostics: Option<String>,
700}
701
702impl BackendContractResult {
703    /// Create a passing result.
704    pub fn pass(name: impl Into<String>) -> Self {
705        Self {
706            name: name.into(),
707            passed: true,
708            diagnostics: None,
709        }
710    }
711
712    /// Create a failing result with diagnostics.
713    pub fn fail(name: impl Into<String>, diagnostics: impl Into<String>) -> Self {
714        Self {
715            name: name.into(),
716            passed: false,
717            diagnostics: Some(diagnostics.into()),
718        }
719    }
720}
721
722/// Resource usage from the invocation.
723#[derive(Debug, Clone, Serialize, Deserialize, Default)]
724pub struct BackendUsage {
725    pub input_tokens: usize,
726    pub output_tokens: usize,
727    pub total_tokens: usize,
728    pub latency_ms: u64,
729    pub cost_microdollars: Option<u64>,
730}
731
732// ============================================================================
733// LlmBackend Trait
734// ============================================================================
735
736/// The unified backend interface.
737///
738/// Both local kernel (converge-llm) and remote providers (converge-provider)
739/// implement this trait, making "local vs remote" genuinely interchangeable.
740///
741/// # Implementors
742///
743/// - `LlamaEngine` (converge-llm) - Local inference with Burn/llama-burn
744/// - `AnthropicBackend` (converge-provider) - Remote Claude API
745/// - Future: OpenAIBackend, CohereReranker, etc.
746///
747/// # Thread Safety
748///
749/// Backends must be `Send + Sync` to support concurrent request handling.
750///
751/// # Deprecation Notice
752///
753/// This trait is deprecated in favor of the capability boundary traits in
754/// `converge_core::traits`:
755///
756/// - [`ChatBackend`](crate::traits::ChatBackend) - For chat completion (GAT async)
757/// - [`EmbedBackend`](crate::traits::EmbedBackend) - For embedding generation (GAT async)
758/// - [`LlmBackend`](crate::traits::LlmBackend) - Umbrella combining both
759///
760/// The new traits use the GAT async pattern for zero-cost async without
761/// `async_trait`. See `converge-core/BOUNDARY.md` for migration guide.
762#[deprecated(
763    since = "0.2.0",
764    note = "Use converge_core::traits::LlmBackend (GAT async) instead. See BOUNDARY.md for migration."
765)]
766pub trait LlmBackend: Send + Sync {
767    /// Backend name for identification and routing.
768    fn name(&self) -> &str;
769
770    /// Whether this backend supports deterministic replay.
771    ///
772    /// - Local backends with fixed seeds: `true`
773    /// - Remote backends: `false` (model versions can change)
774    fn supports_replay(&self) -> bool;
775
776    /// Execute an LLM request.
777    ///
778    /// This is the core interface. Implementations handle:
779    /// - Prompt formatting
780    /// - Model invocation
781    /// - Contract validation
782    /// - Trace link generation
783    fn execute(&self, request: &BackendRequest) -> BackendResult<BackendResponse>;
784
785    /// Check if this backend supports a specific capability.
786    ///
787    /// Used by routing policies to select appropriate backends.
788    fn supports_capability(&self, capability: BackendCapability) -> bool;
789
790    /// List all capabilities this backend supports.
791    ///
792    /// Default implementation checks each capability individually.
793    fn capabilities(&self) -> Vec<BackendCapability> {
794        let all_caps = [
795            BackendCapability::Replay,
796            BackendCapability::Adapters,
797            BackendCapability::Recall,
798            BackendCapability::StepContracts,
799            BackendCapability::FrontierReasoning,
800            BackendCapability::FastIteration,
801            BackendCapability::Offline,
802            BackendCapability::Streaming,
803            BackendCapability::Vision,
804            BackendCapability::ToolUse,
805        ];
806        all_caps
807            .iter()
808            .filter(|cap| self.supports_capability(**cap))
809            .copied()
810            .collect()
811    }
812}
813
814#[cfg(test)]
815mod tests {
816    use super::*;
817
818    #[test]
819    fn test_backend_budgets_default() {
820        let budgets = BackendBudgets::default();
821        assert_eq!(budgets.max_tokens, 1024);
822        assert_eq!(budgets.max_iterations, 1);
823        assert_eq!(budgets.latency_ceiling_ms, 0);
824        assert_eq!(budgets.cost_ceiling_microdollars, 0);
825    }
826
827    #[test]
828    fn test_message_constructors() {
829        let system = Message::system("You are a helpful assistant");
830        assert_eq!(system.role, MessageRole::System);
831        assert_eq!(system.content, "You are a helpful assistant");
832
833        let user = Message::user("Hello");
834        assert_eq!(user.role, MessageRole::User);
835
836        let assistant = Message::assistant("Hi there!");
837        assert_eq!(assistant.role, MessageRole::Assistant);
838    }
839
840    #[test]
841    fn test_contract_report_from_results() {
842        let results = vec![
843            BackendContractResult::pass("contract1"),
844            BackendContractResult::pass("contract2"),
845        ];
846        let report = ContractReport::from_results(results);
847        assert!(report.all_passed);
848
849        let mixed = vec![
850            BackendContractResult::pass("contract1"),
851            BackendContractResult::fail("contract2", "missing field"),
852        ];
853        let report = ContractReport::from_results(mixed);
854        assert!(!report.all_passed);
855    }
856
857    #[test]
858    fn test_backend_error_display() {
859        let err = BackendError::BudgetExceeded {
860            resource: "tokens".to_string(),
861            limit: "1024".to_string(),
862        };
863        assert!(err.to_string().contains("tokens"));
864        assert!(err.to_string().contains("1024"));
865    }
866
867    #[test]
868    fn test_capability_serialization_stable() {
869        assert_eq!(
870            serde_json::to_string(&BackendCapability::Replay).unwrap(),
871            "\"Replay\""
872        );
873        assert_eq!(
874            serde_json::to_string(&BackendCapability::FrontierReasoning).unwrap(),
875            "\"FrontierReasoning\""
876        );
877    }
878
879    #[test]
880    fn test_message_role_serialization_stable() {
881        assert_eq!(
882            serde_json::to_string(&MessageRole::System).unwrap(),
883            "\"System\""
884        );
885        assert_eq!(
886            serde_json::to_string(&MessageRole::User).unwrap(),
887            "\"User\""
888        );
889        assert_eq!(
890            serde_json::to_string(&MessageRole::Assistant).unwrap(),
891            "\"Assistant\""
892        );
893    }
894
895    // =========================================================================
896    // Retry Policy Tests
897    // =========================================================================
898
899    #[test]
900    fn test_retry_policy_default() {
901        let policy = RetryPolicy::default();
902        assert_eq!(policy.max_attempts, 3);
903        assert_eq!(policy.initial_delay_ms, 100);
904        assert_eq!(policy.backoff, BackoffStrategy::Exponential);
905    }
906
907    #[test]
908    fn test_retry_policy_no_retry() {
909        let policy = RetryPolicy::no_retry();
910        assert_eq!(policy.max_attempts, 1);
911        assert!(!policy.should_retry(1));
912    }
913
914    #[test]
915    fn test_retry_policy_delay_exponential() {
916        let policy = RetryPolicy {
917            max_attempts: 5,
918            initial_delay_ms: 100,
919            max_delay_ms: 10_000,
920            backoff: BackoffStrategy::Exponential,
921            jitter_percent: 0,
922        };
923
924        assert_eq!(policy.delay_for_attempt(1), 100);  // 100 * 2^0
925        assert_eq!(policy.delay_for_attempt(2), 200);  // 100 * 2^1
926        assert_eq!(policy.delay_for_attempt(3), 400);  // 100 * 2^2
927        assert_eq!(policy.delay_for_attempt(4), 800);  // 100 * 2^3
928    }
929
930    #[test]
931    fn test_retry_policy_delay_linear() {
932        let policy = RetryPolicy {
933            max_attempts: 5,
934            initial_delay_ms: 100,
935            max_delay_ms: 10_000,
936            backoff: BackoffStrategy::Linear,
937            jitter_percent: 0,
938        };
939
940        assert_eq!(policy.delay_for_attempt(1), 100);  // 100 * 1
941        assert_eq!(policy.delay_for_attempt(2), 200);  // 100 * 2
942        assert_eq!(policy.delay_for_attempt(3), 300);  // 100 * 3
943    }
944
945    #[test]
946    fn test_retry_policy_delay_fixed() {
947        let policy = RetryPolicy {
948            max_attempts: 5,
949            initial_delay_ms: 100,
950            max_delay_ms: 10_000,
951            backoff: BackoffStrategy::Fixed,
952            jitter_percent: 0,
953        };
954
955        assert_eq!(policy.delay_for_attempt(1), 100);
956        assert_eq!(policy.delay_for_attempt(2), 100);
957        assert_eq!(policy.delay_for_attempt(3), 100);
958    }
959
960    #[test]
961    fn test_retry_policy_max_delay_cap() {
962        let policy = RetryPolicy {
963            max_attempts: 20,
964            initial_delay_ms: 1000,
965            max_delay_ms: 5000,
966            backoff: BackoffStrategy::Exponential,
967            jitter_percent: 0,
968        };
969
970        // Exponential would be 1000 * 2^9 = 512000, but capped at 5000
971        assert_eq!(policy.delay_for_attempt(10), 5000);
972    }
973
974    #[test]
975    fn test_retry_policy_should_retry() {
976        let policy = RetryPolicy {
977            max_attempts: 3,
978            ..Default::default()
979        };
980
981        assert!(policy.should_retry(1));
982        assert!(policy.should_retry(2));
983        assert!(!policy.should_retry(3));
984        assert!(!policy.should_retry(4));
985    }
986
987    // =========================================================================
988    // Circuit Breaker Tests
989    // =========================================================================
990
991    #[test]
992    fn test_circuit_breaker_config_default() {
993        let config = CircuitBreakerConfig::default();
994        assert_eq!(config.failure_threshold, 5);
995        assert_eq!(config.success_threshold, 2);
996        assert_eq!(config.timeout_ms, 30_000);
997    }
998
999    #[test]
1000    fn test_circuit_breaker_config_sensitive() {
1001        let config = CircuitBreakerConfig::sensitive();
1002        assert_eq!(config.failure_threshold, 3);
1003        assert!(config.failure_threshold < CircuitBreakerConfig::default().failure_threshold);
1004    }
1005
1006    #[test]
1007    fn test_circuit_breaker_config_tolerant() {
1008        let config = CircuitBreakerConfig::tolerant();
1009        assert_eq!(config.failure_threshold, 10);
1010        assert!(config.failure_threshold > CircuitBreakerConfig::default().failure_threshold);
1011    }
1012
1013    #[test]
1014    fn test_circuit_state_default() {
1015        let state = CircuitState::default();
1016        assert_eq!(state, CircuitState::Closed);
1017    }
1018
1019    // =========================================================================
1020    // Backend Error Retryable Tests
1021    // =========================================================================
1022
1023    #[test]
1024    fn test_timeout_is_retryable() {
1025        let err = BackendError::Timeout {
1026            deadline_ms: 5000,
1027            elapsed_ms: 5001,
1028        };
1029        assert!(err.is_retryable());
1030        assert!(err.is_overload());
1031    }
1032
1033    #[test]
1034    fn test_unavailable_is_retryable() {
1035        let err = BackendError::Unavailable {
1036            message: "Service temporarily unavailable".to_string(),
1037        };
1038        assert!(err.is_retryable());
1039        assert!(err.is_overload());
1040    }
1041
1042    #[test]
1043    fn test_rate_limit_is_retryable() {
1044        let err = BackendError::ExecutionFailed {
1045            message: "Rate limit exceeded (429)".to_string(),
1046        };
1047        assert!(err.is_retryable());
1048        assert!(err.is_overload());
1049    }
1050
1051    #[test]
1052    fn test_invalid_request_not_retryable() {
1053        let err = BackendError::InvalidRequest {
1054            message: "Missing required field".to_string(),
1055        };
1056        assert!(!err.is_retryable());
1057        assert!(!err.is_overload());
1058    }
1059
1060    #[test]
1061    fn test_budget_exceeded_not_retryable() {
1062        let err = BackendError::BudgetExceeded {
1063            resource: "tokens".to_string(),
1064            limit: "1024".to_string(),
1065        };
1066        assert!(!err.is_retryable());
1067        assert!(!err.is_overload());
1068    }
1069
1070    #[test]
1071    fn test_circuit_open_not_retryable() {
1072        let err = BackendError::CircuitOpen {
1073            backend: "anthropic".to_string(),
1074            retry_after_ms: Some(30_000),
1075        };
1076        assert!(!err.is_retryable());
1077        assert!(!err.is_overload());
1078    }
1079
1080    #[test]
1081    fn test_timeout_error_display() {
1082        let err = BackendError::Timeout {
1083            deadline_ms: 5000,
1084            elapsed_ms: 6000,
1085        };
1086        let msg = err.to_string();
1087        assert!(msg.contains("6000"));
1088        assert!(msg.contains("5000"));
1089    }
1090
1091    #[test]
1092    fn test_circuit_open_error_display() {
1093        let err = BackendError::CircuitOpen {
1094            backend: "test-backend".to_string(),
1095            retry_after_ms: Some(30_000),
1096        };
1097        let msg = err.to_string();
1098        assert!(msg.contains("test-backend"));
1099        assert!(msg.contains("30000"));
1100    }
1101
1102    #[test]
1103    fn test_retried_error_display() {
1104        let err = BackendError::Retried {
1105            message: "Final error".to_string(),
1106            attempts: 3,
1107            was_transient: true,
1108        };
1109        let msg = err.to_string();
1110        assert!(msg.contains("3 attempts"));
1111        assert!(msg.contains("transient: true"));
1112    }
1113
1114    // =========================================================================
1115    // Serialization Stability Tests
1116    // =========================================================================
1117
1118    #[test]
1119    fn test_retry_policy_serialization_stable() {
1120        let policy = RetryPolicy::default();
1121        let json = serde_json::to_string(&policy).unwrap();
1122        assert!(json.contains("\"max_attempts\":3"));
1123        assert!(json.contains("\"Exponential\""));
1124
1125        // Round-trip
1126        let parsed: RetryPolicy = serde_json::from_str(&json).unwrap();
1127        assert_eq!(parsed, policy);
1128    }
1129
1130    #[test]
1131    fn test_circuit_breaker_config_serialization_stable() {
1132        let config = CircuitBreakerConfig::default();
1133        let json = serde_json::to_string(&config).unwrap();
1134        assert!(json.contains("\"failure_threshold\":5"));
1135
1136        // Round-trip
1137        let parsed: CircuitBreakerConfig = serde_json::from_str(&json).unwrap();
1138        assert_eq!(parsed, config);
1139    }
1140}