Skip to main content

limit_llm/
providers.rs

1//! Multi-provider LLM support.
2//!
3//! This module provides the [`LlmProvider`] trait that all LLM providers implement,
4//! enabling a unified interface for working with different LLM backends.
5//!
6//! # Available Providers
7//!
8//! | Provider | Type | Features |
9//! |----------|------|----------|
10//! | Anthropic Claude | [`AnthropicClient`] | Streaming, Tools, Thinking |
11//! | OpenAI GPT | [`OpenAiProvider`] | Streaming, Tools |
12//! | z.ai GLM | [`ZaiProvider`] | Streaming, Tools, Thinking |
13//! | Local/Ollama | [`LocalProvider`] | Streaming |
14//!
15//! # Using the Provider Trait
16//!
17//! ```rust,no_run
18//! use limit_llm::{LlmProvider, Message, Tool};
19//! use futures::StreamExt;
20//!
21//! async fn complete_with_provider(
22//!     provider: Box<dyn LlmProvider>,
23//!     messages: Vec<Message>,
24//! ) -> Result<String, limit_llm::LlmError> {
25//!     let mut stream = provider.send(messages, vec![]).await?;
26//!     let mut result = String::new();
27//!     
28//!     while let Some(chunk) = stream.next().await {
29//!         match chunk {
30//!             Ok(limit_llm::ProviderResponseChunk::ContentDelta(text)) => {
31//!                 result.push_str(&text);
32//!             }
33//!             Ok(limit_llm::ProviderResponseChunk::Done(usage)) => {
34//!                 eprintln!("Tokens: {} in, {} out",
35//!                     usage.input_tokens, usage.output_tokens);
36//!             }
37//!             Err(e) => return Err(e),
38//!             _ => {}
39//!         }
40//!     }
41//!     
42//!     Ok(result)
43//! }
44//! ```
45//!
46//! # Response Chunks
47//!
48//! Providers return a stream of [`ProviderResponseChunk`] variants:
49//!
50//! - [`ContentDelta`](ProviderResponseChunk::ContentDelta) — Incremental text content
51//! - [`ReasoningDelta`](ProviderResponseChunk::ReasoningDelta) — Thinking/reasoning content
52//! - [`ToolCallDelta`](ProviderResponseChunk::ToolCallDelta) — Tool call with arguments
53//! - [`Done`](ProviderResponseChunk::Done) — Completion with token usage
54
55use async_trait::async_trait;
56use futures::Stream;
57use std::pin::Pin;
58
59use crate::error::LlmError;
60use crate::types::{Message, Tool, Usage};
61
62/// A chunk of response data from an LLM provider.
63///
64/// Providers stream responses as a sequence of chunks, allowing for
65/// real-time display of generated content.
66#[derive(Debug, Clone)]
67pub enum ProviderResponseChunk {
68    /// Incremental text content from the assistant.
69    ///
70    /// Concatenate these deltas to build the complete response.
71    ContentDelta(String),
72
73    /// Incremental reasoning/thinking content.
74    ///
75    /// Some providers (Claude, z.ai) support extended thinking mode
76    /// where the model shows its reasoning process.
77    ReasoningDelta(String),
78
79    /// A tool call with accumulated arguments.
80    ///
81    /// The `arguments` field contains the JSON arguments parsed so far.
82    /// For streaming tool calls, arguments may be partially complete.
83    ToolCallDelta {
84        /// Unique identifier for this tool call.
85        id: String,
86
87        /// Name of the tool to call.
88        name: String,
89
90        /// JSON arguments (may be partial during streaming).
91        arguments: serde_json::Value,
92    },
93
94    /// Response completion with token usage statistics.
95    ///
96    /// This is always the final chunk in the stream.
97    Done(Usage),
98}
99
100/// Common trait for all LLM providers.
101///
102/// This trait provides a unified interface for sending messages to different
103/// LLM backends. All providers support streaming responses and optional tool calling.
104///
105/// # Implementation
106///
107/// ```rust,ignore
108/// use limit_llm::{LlmProvider, Message, Tool, ProviderResponseChunk, LlmError};
109/// use async_trait::async_trait;
110/// use futures::Stream;
111/// use std::pin::Pin;
112///
113/// struct MyProvider {
114///     // provider-specific fields
115/// }
116///
117/// #[async_trait]
118/// impl LlmProvider for MyProvider {
119///     async fn send(
120///         &self,
121///         messages: Vec<Message>,
122///         tools: Vec<Tool>,
123///     ) -> Result<
124///         Pin<Box<dyn Stream<Item = Result<ProviderResponseChunk, LlmError>> + Send + '_>>,
125///         LlmError,
126///     > {
127///         // Implementation that streams response chunks
128///     }
129///     
130///     fn provider_name(&self) -> &str { "my_provider" }
131///     fn model_name(&self) -> &str { "my-model" }
132///     fn clone_box(&self) -> Box<dyn LlmProvider> {
133///         Box::new(self.clone())
134///     }
135/// }
136/// ```
137#[async_trait]
138pub trait LlmProvider: Send + Sync {
139    /// Send messages to the LLM and receive a streaming response.
140    ///
141    /// # Arguments
142    ///
143    /// * `messages` — The conversation history as a vector of messages
144    /// * `tools` — Optional tool definitions for function calling
145    ///
146    /// # Returns
147    ///
148    /// A stream of response chunks. The stream will always end with a
149    /// [`Done`](ProviderResponseChunk::Done) chunk on success.
150    ///
151    /// # Errors
152    ///
153    /// Returns [`LlmError`] if the request fails before streaming begins.
154    #[allow(clippy::type_complexity)]
155    async fn send(
156        &self,
157        messages: Vec<Message>,
158        tools: Vec<Tool>,
159    ) -> Result<
160        Pin<Box<dyn Stream<Item = Result<ProviderResponseChunk, LlmError>> + Send + '_>>,
161        LlmError,
162    >;
163
164    /// Returns the provider name (e.g., "anthropic", "openai").
165    fn provider_name(&self) -> &str;
166
167    /// Returns the model name (e.g., "claude-3-5-sonnet-20241022").
168    fn model_name(&self) -> &str;
169
170    /// Clones the provider into a boxed trait object.
171    ///
172    /// This enables cloning of `Box<dyn LlmProvider>`.
173    fn clone_box(&self) -> Box<dyn LlmProvider>;
174}
175
176/// Implement Clone for `Box<dyn LlmProvider>`.
177impl Clone for Box<dyn LlmProvider> {
178    fn clone(&self) -> Self {
179        self.clone_box()
180    }
181}
182
183/// Provider configuration variants for TOML deserialization.
184///
185/// This enum is used in the main [`Config`](crate::Config) to specify
186/// which provider to use and its settings.
187#[derive(Debug, Clone, serde::Deserialize)]
188#[serde(tag = "provider", rename_all = "lowercase")]
189pub enum ProviderConfig {
190    /// Anthropic Claude configuration.
191    Anthropic(AnthropicConfig),
192
193    /// OpenAI GPT configuration.
194    OpenAI(OpenAIConfig),
195
196    /// Unknown/unsupported provider.
197    #[serde(other)]
198    Unknown,
199}
200
201/// Anthropic-specific configuration.
202///
203/// # Example TOML
204///
205/// ```toml
206/// provider = "anthropic"
207///
208/// [providers.anthropic]
209/// api_key = "sk-ant-api03-..."
210/// model = "claude-sonnet-4-6-20260217"
211/// max_tokens = 4096
212/// timeout = 60
213/// ```
214#[derive(Debug, Clone, serde::Deserialize)]
215pub struct AnthropicConfig {
216    /// API key (falls back to `ANTHROPIC_API_KEY` env var).
217    pub api_key: Option<String>,
218
219    /// Model to use (default: "claude-3-5-sonnet-20241022").
220    #[serde(default = "default_anthropic_model")]
221    pub model: String,
222
223    /// Maximum tokens in the response (default: 4096).
224    #[serde(default = "default_max_tokens")]
225    pub max_tokens: u32,
226
227    /// Request timeout in seconds (default: 60).
228    #[serde(default = "default_timeout")]
229    pub timeout: u64,
230
231    /// Custom API endpoint (optional).
232    #[serde(default)]
233    pub base_url: Option<String>,
234}
235
236/// OpenAI-specific configuration.
237///
238/// # Example TOML
239///
240/// ```toml
241/// provider = "openai"
242///
243/// [providers.openai]
244/// api_key = "sk-..."
245/// model = "gpt-5.4"
246/// max_tokens = 4096
247/// timeout = 60
248/// base_url = "http://localhost:8080/v1/chat/completions"
249/// ```
250#[derive(Debug, Clone, serde::Deserialize)]
251pub struct OpenAIConfig {
252    /// API key (falls back to `OPENAI_API_KEY` env var).
253    pub api_key: Option<String>,
254
255    /// Model to use (default: "gpt-4").
256    #[serde(default = "default_openai_model")]
257    pub model: String,
258
259    /// Maximum tokens in the response (default: 4096).
260    #[serde(default = "default_max_tokens")]
261    pub max_tokens: u32,
262
263    /// Request timeout in seconds (default: 60).
264    #[serde(default = "default_timeout")]
265    pub timeout: u64,
266
267    /// Custom API endpoint (optional).
268    /// Note: Must include full path (e.g., `/v1/chat/completions`).
269    #[serde(default)]
270    pub base_url: Option<String>,
271}
272
273fn default_anthropic_model() -> String {
274    "claude-3-5-sonnet-20241022".to_string()
275}
276
277fn default_openai_model() -> String {
278    "gpt-4".to_string()
279}
280
281fn default_max_tokens() -> u32 {
282    4096
283}
284
285fn default_timeout() -> u64 {
286    60
287}