Skip to main content

imp_llm/
provider.rs

1use std::pin::Pin;
2use std::time::Duration;
3
4use async_trait::async_trait;
5use futures_core::Stream;
6use serde::{Deserialize, Serialize};
7
8use crate::auth::{ApiKey, AuthStore};
9use crate::error::Result;
10use crate::message::Message;
11use crate::model::{Model, ModelMeta};
12use crate::stream::StreamEvent;
13
14/// A provider handles communication with a specific LLM API.
15///
16/// Each provider (Anthropic, OpenAI, Google, etc.) implements this trait
17/// to normalize streaming responses into [`StreamEvent`]s.
18#[async_trait]
19pub trait Provider: Send + Sync {
20    /// Stream a completion response.
21    fn stream(
22        &self,
23        model: &Model,
24        context: Context,
25        options: RequestOptions,
26        api_key: &str,
27    ) -> Pin<Box<dyn Stream<Item = Result<StreamEvent>> + Send>>;
28
29    /// Resolve an API key for this provider.
30    async fn resolve_auth(&self, auth: &AuthStore) -> Result<ApiKey>;
31
32    /// Provider identifier (e.g., "anthropic", "openai", "google").
33    fn id(&self) -> &str;
34
35    /// List available models for this provider.
36    fn models(&self) -> &[ModelMeta];
37
38    /// Transport capabilities exposed to the runtime. Providers should only
39    /// report features implemented by this crate, not features merely present
40    /// in the upstream vendor API.
41    fn transport_capabilities(&self) -> TransportCapabilities {
42        TransportCapabilities::default()
43    }
44}
45
46/// Provider transport features visible to the agent runtime.
47///
48/// This is intentionally provider-neutral: specific APIs may call these
49/// concepts response IDs, sessions, conversations, or channels, but the agent
50/// should branch on durable behavior rather than vendor names.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
52pub struct TransportCapabilities {
53    pub request_response: bool,
54    pub streaming: bool,
55    pub continuation: ContinuationMode,
56    pub persistent_session: PersistentSessionMode,
57    pub cancellation: CancellationMode,
58    pub resumability: ResumabilityMode,
59}
60
61impl TransportCapabilities {
62    pub const fn stateless_streaming_http() -> Self {
63        Self {
64            request_response: true,
65            streaming: true,
66            continuation: ContinuationMode::None,
67            persistent_session: PersistentSessionMode::None,
68            cancellation: CancellationMode::DropLocalStream,
69            resumability: ResumabilityMode::RestartRequest,
70        }
71    }
72}
73
74impl Default for TransportCapabilities {
75    fn default() -> Self {
76        Self::stateless_streaming_http()
77    }
78}
79
80#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
81#[serde(rename_all = "snake_case")]
82pub enum ContinuationMode {
83    None,
84    ProviderManagedId,
85}
86
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
88#[serde(rename_all = "snake_case")]
89pub enum PersistentSessionMode {
90    None,
91    WebSocket,
92}
93
94#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
95#[serde(rename_all = "snake_case")]
96pub enum CancellationMode {
97    DropLocalStream,
98    ProviderAbort,
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
102#[serde(rename_all = "snake_case")]
103pub enum ResumabilityMode {
104    RestartRequest,
105    ResumeProviderState,
106}
107
108/// Conversation context sent to the provider.
109#[derive(Debug, Clone, Default)]
110pub struct Context {
111    pub messages: Vec<Message>,
112}
113
114/// Tuning knobs for a single LLM request.
115#[derive(Debug, Clone)]
116pub struct RequestOptions {
117    pub thinking_level: ThinkingLevel,
118    pub max_tokens: Option<u32>,
119    pub temperature: Option<f32>,
120    pub system_prompt: String,
121    pub tools: Vec<ToolDefinition>,
122    pub cache_options: CacheOptions,
123    /// Effort level for the model (Anthropic-specific).
124    pub effort: Option<EffortLevel>,
125}
126
127impl Default for RequestOptions {
128    fn default() -> Self {
129        Self {
130            thinking_level: ThinkingLevel::Off,
131            max_tokens: None,
132            temperature: None,
133            system_prompt: String::new(),
134            tools: Vec::new(),
135            cache_options: CacheOptions::default(),
136            effort: None,
137        }
138    }
139}
140
141/// How much effort the model should expend on the task.
142/// Separate from thinking — controls overall thoroughness.
143/// Only supported by Anthropic models with the effort beta.
144#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
145#[serde(rename_all = "lowercase")]
146pub enum EffortLevel {
147    Low,
148    Medium,
149    High,
150}
151
152/// How much reasoning/thinking to request from the model.
153#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
154#[serde(rename_all = "lowercase")]
155pub enum ThinkingLevel {
156    /// No extended thinking.
157    #[default]
158    Off,
159    /// Minimal reasoning.
160    Minimal,
161    /// Low-effort reasoning.
162    Low,
163    /// Moderate reasoning.
164    Medium,
165    /// High-effort reasoning.
166    High,
167    /// Maximum reasoning budget.
168    XHigh,
169}
170
171/// Controls which parts of the request are eligible for prompt caching.
172#[derive(Debug, Clone, Default)]
173pub struct CacheOptions {
174    /// Cache the system prompt across requests.
175    pub cache_system_prompt: bool,
176    /// Cache tool definitions.
177    pub cache_tools: bool,
178    /// Number of recent conversation turns to cache.
179    pub cache_recent_turns: usize,
180    /// Use 1-hour TTL instead of default 5-minute.
181    pub extended_ttl: bool,
182    /// Use global scope (shared across users with identical prompts).
183    pub global_scope: bool,
184}
185
186/// A tool the model may call, defined by a JSON Schema for its parameters.
187#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct ToolDefinition {
189    pub name: String,
190    pub description: String,
191    pub parameters: serde_json::Value,
192}
193
194/// Retry policy for transient failures (rate limits, server errors, timeouts).
195#[derive(Debug, Clone)]
196pub struct RetryPolicy {
197    pub max_retries: u32,
198    pub base_delay: Duration,
199    pub max_delay: Duration,
200    pub retry_on: Vec<RetryCondition>,
201}
202
203impl Default for RetryPolicy {
204    fn default() -> Self {
205        Self {
206            max_retries: 3,
207            base_delay: Duration::from_secs(1),
208            max_delay: Duration::from_secs(30),
209            retry_on: vec![
210                RetryCondition::RateLimit,
211                RetryCondition::ServerError,
212                RetryCondition::Timeout,
213                RetryCondition::ConnectionError,
214            ],
215        }
216    }
217}
218
219/// Conditions under which a request should be retried.
220#[derive(Debug, Clone, PartialEq, Eq)]
221pub enum RetryCondition {
222    RateLimit,
223    ServerError,
224    Timeout,
225    ConnectionError,
226}
227
228#[cfg(test)]
229mod transport_capability_tests {
230    use super::*;
231
232    #[test]
233    fn default_transport_capabilities_are_conservative_streaming_http() {
234        let capabilities = TransportCapabilities::default();
235
236        assert!(capabilities.request_response);
237        assert!(capabilities.streaming);
238        assert_eq!(capabilities.continuation, ContinuationMode::None);
239        assert_eq!(capabilities.persistent_session, PersistentSessionMode::None);
240        assert_eq!(capabilities.cancellation, CancellationMode::DropLocalStream);
241        assert_eq!(capabilities.resumability, ResumabilityMode::RestartRequest);
242    }
243}