Skip to main content

heartbit_core/config/
provider.rs

1#![allow(missing_docs)]
2use serde::Deserialize;
3use std::time::Duration;
4
5/// LLM provider configuration.
6///
7/// When running as a cloud-delegated runtime (daemon mode with no agents),
8/// the provider section can be omitted — per-request provider keys are used instead.
9#[derive(Debug, Default, Deserialize)]
10pub struct ProviderConfig {
11    #[serde(default)]
12    pub name: String,
13    #[serde(default)]
14    pub model: String,
15    /// Custom API endpoint URL (overrides the default for the provider).
16    /// Useful for self-hosted models, Azure, or proxies.
17    #[serde(default)]
18    pub base_url: Option<String>,
19    /// Direct API key (alternative to environment variable).
20    /// Prefer env vars in production; this is for testing/local dev.
21    #[serde(default)]
22    pub api_key: Option<String>,
23    /// Retry configuration for transient LLM API failures.
24    pub retry: Option<RetryProviderConfig>,
25    /// Enable Anthropic prompt caching (system prompt + tool definitions).
26    /// Only effective for the `anthropic` provider. Defaults to `false`.
27    #[serde(default)]
28    pub prompt_caching: bool,
29    /// Model cascading configuration. When enabled, tries cheaper models first
30    /// and escalates to the main model only when the confidence gate rejects.
31    pub cascade: Option<CascadeConfig>,
32    /// Circuit breaker configuration for this provider.
33    /// When absent, sensible defaults are used (5 failures → 30 s open, max 300 s).
34    #[serde(default)]
35    pub circuit: ProviderCircuitConfig,
36}
37
38/// Model cascading configuration for cost-efficient LLM selection.
39///
40/// When enabled, the provider tries cheaper model tiers first and only
41/// escalates to the main (most expensive) model when the confidence gate
42/// rejects the cheaper response or the tier errors.
43#[derive(Debug, Clone, Deserialize)]
44pub struct CascadeConfig {
45    /// Enable model cascading. Default: false.
46    #[serde(default)]
47    pub enabled: bool,
48    /// Model tiers from cheapest to most expensive.
49    /// The main `[provider].model` is always the implicit final tier.
50    #[serde(default)]
51    pub tiers: Vec<CascadeTierConfig>,
52    /// Confidence gate configuration. Default: heuristic with sensible defaults.
53    #[serde(default)]
54    pub gate: CascadeGateConfig,
55}
56
57/// A single tier in the model cascade.
58#[derive(Debug, Clone, Deserialize)]
59pub struct CascadeTierConfig {
60    pub model: String,
61}
62
63/// Confidence gate configuration for model cascading.
64#[derive(Debug, Clone, Deserialize)]
65#[serde(tag = "type", rename_all = "snake_case")]
66pub enum CascadeGateConfig {
67    /// Heuristic gate: zero-cost checks on response length, refusal patterns, etc.
68    Heuristic {
69        /// Minimum output tokens for acceptance (default: 5).
70        #[serde(default = "default_min_output_tokens")]
71        min_output_tokens: u32,
72        /// Accept responses that include tool calls (default: true).
73        #[serde(default = "super::default_true")]
74        accept_tool_calls: bool,
75        /// Escalate on MaxTokens stop reason (default: true).
76        #[serde(default = "super::default_true")]
77        escalate_on_max_tokens: bool,
78    },
79}
80
81impl Default for CascadeGateConfig {
82    fn default() -> Self {
83        Self::Heuristic {
84            min_output_tokens: default_min_output_tokens(),
85            accept_tool_calls: true,
86            escalate_on_max_tokens: true,
87        }
88    }
89}
90
91fn default_min_output_tokens() -> u32 {
92    5
93}
94
95/// Circuit breaker configuration for the LLM provider.
96///
97/// Controls how quickly the circuit opens on consecutive failures and how long
98/// it stays open before allowing a probe request through. All fields are optional;
99/// absent fields fall back to [`crate::llm::circuit::CircuitConfig`] defaults.
100#[derive(Debug, Clone, Default, serde::Serialize, Deserialize)]
101#[serde(deny_unknown_fields)]
102pub struct ProviderCircuitConfig {
103    /// Number of consecutive failures before the circuit opens. Must be > 0.
104    #[serde(default, skip_serializing_if = "Option::is_none")]
105    pub failure_threshold: Option<u32>,
106    /// Initial duration in seconds the circuit stays open after tripping. Must be > 0.
107    #[serde(default, skip_serializing_if = "Option::is_none")]
108    pub initial_open_duration_seconds: Option<u32>,
109    /// Maximum backoff duration in seconds before a half-open probe. Must be > 0.
110    #[serde(default, skip_serializing_if = "Option::is_none")]
111    pub max_open_duration_seconds: Option<u32>,
112    /// Backoff multiplier applied after each re-trip (exponential backoff).
113    #[serde(default, skip_serializing_if = "Option::is_none")]
114    pub backoff_multiplier: Option<f64>,
115}
116
117impl From<&ProviderCircuitConfig> for crate::llm::circuit::CircuitConfig {
118    fn from(c: &ProviderCircuitConfig) -> Self {
119        let default = crate::llm::circuit::CircuitConfig::default();
120        Self {
121            failure_threshold: c.failure_threshold.unwrap_or(default.failure_threshold),
122            initial_open_duration: c
123                .initial_open_duration_seconds
124                .map(|s| std::time::Duration::from_secs(u64::from(s)))
125                .unwrap_or(default.initial_open_duration),
126            max_open_duration: c
127                .max_open_duration_seconds
128                .map(|s| std::time::Duration::from_secs(u64::from(s)))
129                .unwrap_or(default.max_open_duration),
130            backoff_multiplier: c.backoff_multiplier.unwrap_or(default.backoff_multiplier),
131        }
132    }
133}
134
135/// Retry configuration for transient LLM API failures (429, 500, 502, 503, 529).
136#[derive(Debug, Deserialize)]
137pub struct RetryProviderConfig {
138    /// Maximum retry attempts (default: 3).
139    #[serde(default = "default_max_retries")]
140    pub max_retries: u32,
141    /// Base delay in milliseconds for exponential backoff (default: 500).
142    #[serde(default = "default_base_delay_ms")]
143    pub base_delay_ms: u64,
144    /// Maximum delay cap in milliseconds (default: 30000).
145    #[serde(default = "default_max_delay_ms")]
146    pub max_delay_ms: u64,
147}
148
149fn default_max_retries() -> u32 {
150    3
151}
152
153fn default_base_delay_ms() -> u64 {
154    500
155}
156
157fn default_max_delay_ms() -> u64 {
158    30_000
159}
160
161impl From<&RetryProviderConfig> for crate::llm::retry::RetryConfig {
162    fn from(r: &RetryProviderConfig) -> Self {
163        Self {
164            max_retries: r.max_retries,
165            base_delay: Duration::from_millis(r.base_delay_ms),
166            max_delay: Duration::from_millis(r.max_delay_ms),
167        }
168    }
169}