limit_llm/providers.rs
1//! Multi-provider LLM support.
2//!
3//! This module provides the [`LlmProvider`] trait that all LLM providers implement,
4//! enabling a unified interface for working with different LLM backends.
5//!
6//! # Available Providers
7//!
8//! | Provider | Type | Features |
9//! |----------|------|----------|
10//! | Anthropic Claude | [`AnthropicClient`] | Streaming, Tools, Thinking |
11//! | OpenAI GPT | [`OpenAiProvider`] | Streaming, Tools |
12//! | z.ai GLM | [`ZaiProvider`] | Streaming, Tools, Thinking |
13//! | Local/Ollama | [`LocalProvider`] | Streaming |
14//!
15//! # Using the Provider Trait
16//!
17//! ```rust,no_run
18//! use limit_llm::{LlmProvider, Message, Tool};
19//! use futures::StreamExt;
20//!
21//! async fn complete_with_provider(
22//! provider: Box<dyn LlmProvider>,
23//! messages: Vec<Message>,
24//! ) -> Result<String, limit_llm::LlmError> {
25//! let mut stream = provider.send(messages, vec![]).await?;
26//! let mut result = String::new();
27//!
28//! while let Some(chunk) = stream.next().await {
29//! match chunk {
30//! Ok(limit_llm::ProviderResponseChunk::ContentDelta(text)) => {
31//! result.push_str(&text);
32//! }
33//! Ok(limit_llm::ProviderResponseChunk::Done(usage)) => {
34//! eprintln!("Tokens: {} in, {} out",
35//! usage.input_tokens, usage.output_tokens);
36//! }
37//! Err(e) => return Err(e),
38//! _ => {}
39//! }
40//! }
41//!
42//! Ok(result)
43//! }
44//! ```
45//!
46//! # Response Chunks
47//!
48//! Providers return a stream of [`ProviderResponseChunk`] variants:
49//!
50//! - [`ContentDelta`](ProviderResponseChunk::ContentDelta) — Incremental text content
51//! - [`ReasoningDelta`](ProviderResponseChunk::ReasoningDelta) — Thinking/reasoning content
52//! - [`ToolCallDelta`](ProviderResponseChunk::ToolCallDelta) — Tool call with arguments
53//! - [`Done`](ProviderResponseChunk::Done) — Completion with token usage
54
55use async_trait::async_trait;
56use futures::Stream;
57use std::pin::Pin;
58
59use crate::error::LlmError;
60use crate::types::{Message, Tool, Usage};
61
62/// A chunk of response data from an LLM provider.
63///
64/// Providers stream responses as a sequence of chunks, allowing for
65/// real-time display of generated content.
66#[derive(Debug, Clone)]
67pub enum ProviderResponseChunk {
68 /// Incremental text content from the assistant.
69 ///
70 /// Concatenate these deltas to build the complete response.
71 ContentDelta(String),
72
73 /// Incremental reasoning/thinking content.
74 ///
75 /// Some providers (Claude, z.ai) support extended thinking mode
76 /// where the model shows its reasoning process.
77 ReasoningDelta(String),
78
79 /// A tool call with accumulated arguments.
80 ///
81 /// The `arguments` field contains the JSON arguments parsed so far.
82 /// For streaming tool calls, arguments may be partially complete.
83 ToolCallDelta {
84 /// Unique identifier for this tool call.
85 id: String,
86
87 /// Name of the tool to call.
88 name: String,
89
90 /// JSON arguments (may be partial during streaming).
91 arguments: serde_json::Value,
92 },
93
94 /// Response completion with token usage statistics.
95 ///
96 /// This is always the final chunk in the stream.
97 Done(Usage),
98}
99
100/// Common trait for all LLM providers.
101///
102/// This trait provides a unified interface for sending messages to different
103/// LLM backends. All providers support streaming responses and optional tool calling.
104///
105/// # Implementation
106///
107/// ```rust,ignore
108/// use limit_llm::{LlmProvider, Message, Tool, ProviderResponseChunk, LlmError};
109/// use async_trait::async_trait;
110/// use futures::Stream;
111/// use std::pin::Pin;
112///
113/// struct MyProvider {
114/// // provider-specific fields
115/// }
116///
117/// #[async_trait]
118/// impl LlmProvider for MyProvider {
119/// async fn send(
120/// &self,
121/// messages: Vec<Message>,
122/// tools: Vec<Tool>,
123/// ) -> Result<
124/// Pin<Box<dyn Stream<Item = Result<ProviderResponseChunk, LlmError>> + Send + '_>>,
125/// LlmError,
126/// > {
127/// // Implementation that streams response chunks
128/// }
129///
130/// fn provider_name(&self) -> &str { "my_provider" }
131/// fn model_name(&self) -> &str { "my-model" }
132/// fn clone_box(&self) -> Box<dyn LlmProvider> {
133/// Box::new(self.clone())
134/// }
135/// }
136/// ```
137#[async_trait]
138pub trait LlmProvider: Send + Sync {
139 /// Send messages to the LLM and receive a streaming response.
140 ///
141 /// # Arguments
142 ///
143 /// * `messages` — The conversation history as a vector of messages
144 /// * `tools` — Optional tool definitions for function calling
145 ///
146 /// # Returns
147 ///
148 /// A stream of response chunks. The stream will always end with a
149 /// [`Done`](ProviderResponseChunk::Done) chunk on success.
150 ///
151 /// # Errors
152 ///
153 /// Returns [`LlmError`] if the request fails before streaming begins.
154 #[allow(clippy::type_complexity)]
155 async fn send(
156 &self,
157 messages: Vec<Message>,
158 tools: Vec<Tool>,
159 ) -> Result<
160 Pin<Box<dyn Stream<Item = Result<ProviderResponseChunk, LlmError>> + Send + '_>>,
161 LlmError,
162 >;
163
164 /// Returns the provider name (e.g., "anthropic", "openai").
165 fn provider_name(&self) -> &str;
166
167 /// Returns the model name (e.g., "claude-3-5-sonnet-20241022").
168 fn model_name(&self) -> &str;
169
170 /// Clones the provider into a boxed trait object.
171 ///
172 /// This enables cloning of `Box<dyn LlmProvider>`.
173 fn clone_box(&self) -> Box<dyn LlmProvider>;
174}
175
176/// Implement Clone for `Box<dyn LlmProvider>`.
177impl Clone for Box<dyn LlmProvider> {
178 fn clone(&self) -> Self {
179 self.clone_box()
180 }
181}
182
183/// Provider configuration variants for TOML deserialization.
184///
185/// This enum is used in the main [`Config`](crate::Config) to specify
186/// which provider to use and its settings.
187#[derive(Debug, Clone, serde::Deserialize)]
188#[serde(tag = "provider", rename_all = "lowercase")]
189pub enum ProviderConfig {
190 /// Anthropic Claude configuration.
191 Anthropic(AnthropicConfig),
192
193 /// OpenAI GPT configuration.
194 OpenAI(OpenAIConfig),
195
196 /// Unknown/unsupported provider.
197 #[serde(other)]
198 Unknown,
199}
200
201/// Anthropic-specific configuration.
202///
203/// # Example TOML
204///
205/// ```toml
206/// provider = "anthropic"
207///
208/// [providers.anthropic]
209/// api_key = "sk-ant-api03-..."
210/// model = "claude-sonnet-4-6-20260217"
211/// max_tokens = 4096
212/// timeout = 60
213/// ```
214#[derive(Debug, Clone, serde::Deserialize)]
215pub struct AnthropicConfig {
216 /// API key (falls back to `ANTHROPIC_API_KEY` env var).
217 pub api_key: Option<String>,
218
219 /// Model to use (default: "claude-3-5-sonnet-20241022").
220 #[serde(default = "default_anthropic_model")]
221 pub model: String,
222
223 /// Maximum tokens in the response (default: 4096).
224 #[serde(default = "default_max_tokens")]
225 pub max_tokens: u32,
226
227 /// Request timeout in seconds (default: 60).
228 #[serde(default = "default_timeout")]
229 pub timeout: u64,
230
231 /// Custom API endpoint (optional).
232 #[serde(default)]
233 pub base_url: Option<String>,
234}
235
236/// OpenAI-specific configuration.
237///
238/// # Example TOML
239///
240/// ```toml
241/// provider = "openai"
242///
243/// [providers.openai]
244/// api_key = "sk-..."
245/// model = "gpt-5.4"
246/// max_tokens = 4096
247/// timeout = 60
248/// base_url = "http://localhost:8080/v1/chat/completions"
249/// ```
250#[derive(Debug, Clone, serde::Deserialize)]
251pub struct OpenAIConfig {
252 /// API key (falls back to `OPENAI_API_KEY` env var).
253 pub api_key: Option<String>,
254
255 /// Model to use (default: "gpt-4").
256 #[serde(default = "default_openai_model")]
257 pub model: String,
258
259 /// Maximum tokens in the response (default: 4096).
260 #[serde(default = "default_max_tokens")]
261 pub max_tokens: u32,
262
263 /// Request timeout in seconds (default: 60).
264 #[serde(default = "default_timeout")]
265 pub timeout: u64,
266
267 /// Custom API endpoint (optional).
268 /// Note: Must include full path (e.g., `/v1/chat/completions`).
269 #[serde(default)]
270 pub base_url: Option<String>,
271}
272
273fn default_anthropic_model() -> String {
274 "claude-3-5-sonnet-20241022".to_string()
275}
276
277fn default_openai_model() -> String {
278 "gpt-4".to_string()
279}
280
281fn default_max_tokens() -> u32 {
282 4096
283}
284
285fn default_timeout() -> u64 {
286 60
287}