Skip to main content

vtcode_core/llm/
mod.rs

1//! # LLM Integration Layer
2//!
3//! This module provides a unified, modular interface for integrating multiple LLM providers
4//! with VT Code, supporting Gemini, OpenAI, Anthropic, xAI, and DeepSeek.
5//!
6//! ## Architecture Overview
7//!
8//! The LLM layer is designed with several key principles:
9//!
10//! - **Unified Interface**: Single `AnyClient` trait for all providers
11//! - **Provider Agnostic**: Easy switching between providers
12//! - **Configuration Driven**: TOML-based provider configuration
13//! - **Error Handling**: Comprehensive error types and recovery
14//! - **Async Support**: Full async/await support for all operations
15//!
16//! ## Supported Providers
17//!
18//! | Provider | Status | Models |
19//! |----------|--------|---------|
20//! | Gemini | ✓ | gemini-3.1-pro-preview, gemini-3-flash-preview |
21//! | OpenAI | ✓ | gpt-5, o3, o4-mini, gpt-5-mini, gpt-5-nano |
22//! | Anthropic | ✓ | claude-4.1-opus, claude-4-sonnet |
23//! | xAI | ✓ | grok-2-latest, grok-2-mini |
24//! | DeepSeek | ✓ | deepseek-chat, deepseek-reasoner |
25//! | Z.AI | ✓ | glm-5 |
26//! | Ollama | ✓ | gpt-oss:20b (local) |
27//!
28//! ## Basic Usage
29//!
30//! ```rust,no_run
31//! use vtcode_core::llm::{AnyClient, make_client};
32//! use vtcode_core::utils::dot_config::ProviderConfigs;
33//!
34//! #[tokio::main]
35//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
36//!     // Configure providers
37//!     let providers = ProviderConfigs {
38//!         gemini: Some(vtcode_core::utils::dot_config::ProviderConfig {
39//!             api_key: std::env::var("GEMINI_API_KEY")?,
40//!             model: "gemini-3-flash-preview".to_string(),
41//!             ..Default::default()
42//!         }),
43//!         ..Default::default()
44//!     };
45//!
46//!     // Create client
47//!     let client = make_client(&providers, "gemini")?;
48//!
49//!     // Make a request
50//!     let messages = vec![
51//!         vtcode_core::llm::types::Message {
52//!             role: "user".to_string(),
53//!             content: "Hello, how can you help me with coding?".to_string(),
54//!         }
55//!     ];
56//!
57//!     let response = client.chat(&messages, None).await?;
58//!     println!("Response: {}", response.content);
59//!
60//!     Ok(())
61//! }
62//! ```
63//!
64//! ## Provider Configuration
65//!
66//! ```rust,no_run
67//! use vtcode_core::utils::dot_config::{ProviderConfigs, ProviderConfig};
68//!
69//! let config = ProviderConfigs {
70//!     gemini: Some(ProviderConfig {
71//!         api_key: "your-api-key".to_string(),
72//!         model: "gemini-3-flash-preview".to_string(),
73//!         temperature: Some(0.7),
74//!         max_tokens: Some(4096),
75//!         ..Default::default()
76//!     }),
77//!     openai: Some(ProviderConfig {
78//!         api_key: "your-openai-key".to_string(),
79//!         model: "gpt-5".to_string(),
80//!         temperature: Some(0.3),
81//!         max_tokens: Some(8192),
82//!         ..Default::default()
83//!     }),
84//!     ..Default::default()
85//! };
86//! ```
87//!
88//! ## Advanced Features
89//!
90//! ### Streaming Responses
91//! ```rust,no_run
92//! use vtcode_core::llm::AnyClient;
93//! use futures::StreamExt;
94//!
95//! let client = make_client(&providers, "gemini")?;
96//!
97//! let mut stream = client.chat_stream(&messages, None).await?;
98//! while let Some(chunk) = stream.next().await {
99//!     match chunk {
100//!         Ok(response) => print!("{}", response.content),
101//!         Err(e) => eprintln!("Error: {}", e),
102//!     }
103//! }
104//! ```
105//!
106//! ### Function Calling
107//! ```rust,no_run
108//! use vtcode_core::llm::types::{FunctionDeclaration, FunctionCall};
109//!
110//! let functions = vec![
111//!     FunctionDeclaration {
112//!         name: "read_file".to_string(),
113//!         description: "Read a file from the filesystem".to_string(),
114//!         parameters: serde_json::json!({
115//!             "type": "object",
116//!             "properties": {
117//!                 "path": {"type": "string", "description": "File path to read"}
118//!             },
119//!             "required": ["path"]
120//!         }),
121//!     }
122//! ];
123//!
124//! let response = client.chat_with_functions(&messages, &functions, None).await?;
125//!
126//! if let Some(function_call) = response.function_call {
127//!     match function_call.name.as_str() {
128//!         "read_file" => {
129//!             // Handle function call
130//!         }
131//!         _ => {}
132//!     }
133//! }
134//! ```
135//!
136//! ## Error Handling
137//!
138//! The LLM layer provides comprehensive error handling:
139//!
140//! ```rust,no_run
141//! use vtcode_core::llm::LLMError;
142//!
143//! match client.chat(&messages, None).await {
144//!     Ok(response) => println!("Success: {}", response.content),
145//!     Err(LLMError::Authentication) => eprintln!("Authentication failed"),
146//!     Err(LLMError::RateLimit { metadata: None }) => eprintln!("Rate limit exceeded"),
147//!     Err(LLMError::Network { message: e, metadata: None }) => eprintln!("Network error: {}", e),
148//!     Err(LLMError::Provider { message: e, metadata: None }) => eprintln!("Provider error: {}", e),
149//!     Err(e) => eprintln!("Other error: {}", e),
150//! }
151//! ```
152//!
153//! ## Performance Considerations
154//!
155//! - **Connection Pooling**: Efficient connection reuse
156//! - **Request Batching**: Where supported by providers
157//! - **Caching**: Built-in prompt caching for repeated requests
158//! - **Timeout Handling**: Configurable timeouts and retries
159//! - **Rate Limiting**: Automatic rate limit handling
160//!
161//! # LLM abstraction layer with modular architecture
162//!
163//! This module provides a unified interface for different LLM providers
164//! with provider-specific implementations.
165
166pub mod capabilities;
167pub mod cgp;
168pub mod client;
169pub mod error_display;
170pub mod factory;
171pub mod http_client;
172pub mod lightweight_routing;
173pub mod model_resolver;
174pub mod optimized_client;
175pub mod provider;
176pub mod provider_base; // Shared provider utilities to eliminate duplicate code
177pub mod provider_builder;
178pub mod provider_config;
179pub mod providers;
180pub mod rig_adapter;
181mod single_response;
182pub mod tool_bridge;
183pub mod types;
184pub mod utils; // Shared utilities for request/response processing // Centralized HTTP client factory
185
186#[cfg(test)]
187mod error_display_test;
188
189// Re-export main types for backward compatibility
190pub use capabilities::ProviderCapabilities;
191pub use client::{AnyClient, ProviderClientAdapter, make_client};
192pub use factory::{
193    create_provider_with_config, get_factory, get_models_manager, infer_provider_from_model,
194};
195pub use lightweight_routing::{
196    LightweightFeature, LightweightRouteResolution, LightweightRouteSource, ModelRoute,
197    auto_lightweight_model, create_provider_for_model_route, lightweight_model_choices,
198    main_model_route, resolve_api_key_for_model_route, resolve_lightweight_route,
199};
200pub use model_resolver::{
201    DynamicModelMeta, DynamicModelRef, ModelAvailability, ModelResolver, ResolvedModel,
202};
203pub use optimized_client::{OptimizedLLMClient, OptimizedRequest, OptimizedResponse};
204pub use provider::{FinishReason, LLMStream, LLMStreamEvent, Usage};
205pub use providers::{
206    AnthropicProvider, GeminiProvider, HuggingFaceProvider, OllamaProvider, OpenAIProvider,
207    ZAIProvider,
208};
209pub use single_response::collect_single_response;
210pub use tool_bridge::{
211    CorrelationStats, IntentFulfillment, MessageCorrelationTracker, MessageToolCorrelation,
212    ToolExecution, ToolIntent, ToolIntentExtractor,
213};
214
215pub use types::{BackendKind, LLMError, LLMResponse};