Skip to main content

imp_llm/
provider.rs

1use std::pin::Pin;
2use std::time::Duration;
3
4use async_trait::async_trait;
5use futures_core::Stream;
6use serde::{Deserialize, Serialize};
7
8use crate::auth::{ApiKey, AuthStore};
9use crate::error::Result;
10use crate::message::Message;
11use crate::model::{Model, ModelMeta};
12use crate::stream::StreamEvent;
13
14/// A provider handles communication with a specific LLM API.
15///
16/// Each provider (Anthropic, OpenAI, Google, etc.) implements this trait
17/// to normalize streaming responses into [`StreamEvent`]s.
18#[async_trait]
19pub trait Provider: Send + Sync {
20    /// Stream a completion response.
21    fn stream(
22        &self,
23        model: &Model,
24        context: Context,
25        options: RequestOptions,
26        api_key: &str,
27    ) -> Pin<Box<dyn Stream<Item = Result<StreamEvent>> + Send>>;
28
29    /// Resolve an API key for this provider.
30    async fn resolve_auth(&self, auth: &AuthStore) -> Result<ApiKey>;
31
32    /// Provider identifier (e.g., "anthropic", "openai", "google").
33    fn id(&self) -> &str;
34
35    /// List available models for this provider.
36    fn models(&self) -> &[ModelMeta];
37}
38
39/// Conversation context sent to the provider.
40#[derive(Debug, Clone, Default)]
41pub struct Context {
42    pub messages: Vec<Message>,
43}
44
45/// Tuning knobs for a single LLM request.
46#[derive(Debug, Clone)]
47pub struct RequestOptions {
48    pub thinking_level: ThinkingLevel,
49    pub max_tokens: Option<u32>,
50    pub temperature: Option<f32>,
51    pub system_prompt: String,
52    pub tools: Vec<ToolDefinition>,
53    pub cache_options: CacheOptions,
54    /// Effort level for the model (Anthropic-specific).
55    pub effort: Option<EffortLevel>,
56}
57
58impl Default for RequestOptions {
59    fn default() -> Self {
60        Self {
61            thinking_level: ThinkingLevel::Off,
62            max_tokens: None,
63            temperature: None,
64            system_prompt: String::new(),
65            tools: Vec::new(),
66            cache_options: CacheOptions::default(),
67            effort: None,
68        }
69    }
70}
71
72/// How much effort the model should expend on the task.
73/// Separate from thinking — controls overall thoroughness.
74/// Only supported by Anthropic models with the effort beta.
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
76#[serde(rename_all = "lowercase")]
77pub enum EffortLevel {
78    Low,
79    Medium,
80    High,
81}
82
83/// How much reasoning/thinking to request from the model.
84#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
85#[serde(rename_all = "lowercase")]
86pub enum ThinkingLevel {
87    /// No extended thinking.
88    #[default]
89    Off,
90    /// Minimal reasoning.
91    Minimal,
92    /// Low-effort reasoning.
93    Low,
94    /// Moderate reasoning.
95    Medium,
96    /// High-effort reasoning.
97    High,
98    /// Maximum reasoning budget.
99    XHigh,
100}
101
102/// Controls which parts of the request are eligible for prompt caching.
103#[derive(Debug, Clone, Default)]
104pub struct CacheOptions {
105    /// Cache the system prompt across requests.
106    pub cache_system_prompt: bool,
107    /// Cache tool definitions.
108    pub cache_tools: bool,
109    /// Number of recent conversation turns to cache.
110    pub cache_recent_turns: usize,
111    /// Use 1-hour TTL instead of default 5-minute.
112    pub extended_ttl: bool,
113    /// Use global scope (shared across users with identical prompts).
114    pub global_scope: bool,
115}
116
117/// A tool the model may call, defined by a JSON Schema for its parameters.
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct ToolDefinition {
120    pub name: String,
121    pub description: String,
122    pub parameters: serde_json::Value,
123}
124
125/// Retry policy for transient failures (rate limits, server errors, timeouts).
126#[derive(Debug, Clone)]
127pub struct RetryPolicy {
128    pub max_retries: u32,
129    pub base_delay: Duration,
130    pub max_delay: Duration,
131    pub retry_on: Vec<RetryCondition>,
132}
133
134impl Default for RetryPolicy {
135    fn default() -> Self {
136        Self {
137            max_retries: 3,
138            base_delay: Duration::from_secs(1),
139            max_delay: Duration::from_secs(30),
140            retry_on: vec![
141                RetryCondition::RateLimit,
142                RetryCondition::ServerError,
143                RetryCondition::Timeout,
144                RetryCondition::ConnectionError,
145            ],
146        }
147    }
148}
149
150/// Conditions under which a request should be retried.
151#[derive(Debug, Clone, PartialEq, Eq)]
152pub enum RetryCondition {
153    RateLimit,
154    ServerError,
155    Timeout,
156    ConnectionError,
157}