Skip to main content

vtcode_core/llm/provider/
provider_trait.rs

1use async_stream::try_stream;
2use async_trait::async_trait;
3use compact_str::format_compact;
4use once_cell::sync::Lazy;
5use rustc_hash::FxHashMap;
6use std::sync::RwLock;
7use vtcode_commons::llm::BackendKind;
8
9use crate::types::CompactStr;
10
11use super::{
12    LLMNormalizedStream, LLMRequest, LLMResponse, LLMStream, LLMStreamEvent, Message,
13    ResponsesCompactionOptions,
14};
15pub use vtcode_commons::llm::{LLMError, LLMErrorMetadata};
16
17/// Cached provider capabilities to reduce repeated trait method calls
18#[derive(Debug, Clone)]
19pub struct ProviderCapabilities {
20    pub provider_name: String,
21    pub model: String,
22    pub streaming: bool,
23    pub reasoning: bool,
24    pub reasoning_effort: bool,
25    pub tools: bool,
26    pub parallel_tool_config: bool,
27    pub structured_output: bool,
28    pub context_caching: bool,
29    pub responses_compaction: bool,
30    pub context_edits: bool,
31    pub vision: bool,
32    pub context_size: usize,
33}
34
35impl ProviderCapabilities {
36    pub fn detect(provider: &dyn LLMProvider, model: &str) -> Self {
37        Self {
38            provider_name: provider.name().to_string(),
39            model: model.to_string(),
40            streaming: provider.supports_streaming(),
41            reasoning: provider.supports_reasoning(model),
42            reasoning_effort: provider.supports_reasoning_effort(model),
43            tools: provider.supports_tools(model),
44            parallel_tool_config: provider.supports_parallel_tool_config(model),
45            structured_output: provider.supports_structured_output(model),
46            context_caching: provider.supports_context_caching(model),
47            responses_compaction: provider.supports_responses_compaction(model),
48            context_edits: provider.supports_context_edits(model),
49            vision: provider.supports_vision(model),
50            context_size: provider.effective_context_size(model),
51        }
52    }
53
54    pub fn has_advanced_features(&self) -> bool {
55        self.reasoning || self.structured_output || self.context_caching || self.reasoning_effort
56    }
57
58    pub fn summary(&self) -> String {
59        let mut features = Vec::new();
60
61        if self.streaming {
62            features.push("streaming");
63        }
64        if self.reasoning {
65            features.push("advanced-reasoning");
66        }
67        if self.reasoning_effort {
68            features.push("reasoning-effort");
69        }
70        if self.structured_output {
71            features.push("structured-output");
72        }
73        if self.context_caching {
74            features.push("context-caching");
75        }
76        if self.parallel_tool_config {
77            features.push("parallel-tools");
78        }
79        if self.responses_compaction {
80            features.push("responses-compaction");
81        }
82        if self.context_edits {
83            features.push("context-edits");
84        }
85
86        let features_str = if features.is_empty() {
87            "basic".to_string()
88        } else {
89            features.join(", ")
90        };
91
92        format!(
93            "{} ({} tokens): {}",
94            self.model, self.context_size, features_str
95        )
96    }
97}
98
99/// Global cache for provider capabilities (provider_name::model -> capabilities)
100static CAPABILITY_CACHE: Lazy<RwLock<FxHashMap<CompactStr, ProviderCapabilities>>> =
101    Lazy::new(|| RwLock::new(FxHashMap::default()));
102
103/// Extract and cache provider capabilities for a given provider and model
104pub fn get_cached_capabilities(provider: &dyn LLMProvider, model: &str) -> ProviderCapabilities {
105    let cache_key = format_compact!("{}::{}", provider.name(), model);
106
107    // Check if already cached
108    if let Ok(cache) = CAPABILITY_CACHE.read()
109        && let Some(caps) = cache.get(&cache_key)
110    {
111        return caps.clone();
112    }
113
114    // Compute capabilities
115    let caps = ProviderCapabilities::detect(provider, model);
116
117    // Cache for future use
118    if let Ok(mut cache) = CAPABILITY_CACHE.write() {
119        cache.insert(cache_key, caps.clone());
120    }
121
122    caps
123}
124
125/// Universal LLM provider trait
126#[async_trait]
127pub trait LLMProvider: Send + Sync {
128    /// Provider name (e.g., "gemini", "openai", "anthropic")
129    fn name(&self) -> &str;
130
131    /// The canonical backend kind for this provider.
132    ///
133    /// Defaults to matching on [`name()`](LLMProvider::name) against the
134    /// well-known provider names. Providers should override this when their
135    /// name does not match the canonical mapping (e.g., dynamic names).
136    fn backend_kind(&self) -> BackendKind {
137        match self.name() {
138            "gemini" => BackendKind::Gemini,
139            "openai" => BackendKind::OpenAI,
140            "anthropic" => BackendKind::Anthropic,
141            "deepseek" => BackendKind::DeepSeek,
142            "mistral" => BackendKind::Mistral,
143            "openrouter" => BackendKind::OpenRouter,
144            "ollama" => BackendKind::Ollama,
145            "llamacpp" => BackendKind::LlamaCpp,
146            "zai" => BackendKind::ZAI,
147            "moonshot" => BackendKind::Moonshot,
148            "huggingface" => BackendKind::HuggingFace,
149            "minimax" => BackendKind::Minimax,
150            "mimo" => BackendKind::MiMo,
151            "opencode-zen" => BackendKind::OpenCodeZen,
152            "opencode-go" => BackendKind::OpenCodeGo,
153            "qwen" => BackendKind::Qwen,
154            "stepfun" => BackendKind::StepFun,
155            "evolink" => BackendKind::Evolink,
156            "poolside" => BackendKind::Poolside,
157            _ => BackendKind::OpenAI,
158        }
159    }
160
161    /// Whether the provider has native streaming support
162    fn supports_streaming(&self) -> bool {
163        false
164    }
165
166    /// Whether the provider can service non-streaming generation requests for the model.
167    fn supports_non_streaming(&self, _model: &str) -> bool {
168        true
169    }
170
171    /// Whether the provider surfaces structured reasoning traces for the given model
172    fn supports_reasoning(&self, _model: &str) -> bool {
173        false
174    }
175
176    /// Whether the provider accepts configurable reasoning effort for the model
177    fn supports_reasoning_effort(&self, _model: &str) -> bool {
178        false
179    }
180
181    /// Whether the provider supports structured tool calling for the given model
182    fn supports_tools(&self, _model: &str) -> bool {
183        true
184    }
185
186    /// Whether the provider understands parallel tool configuration payloads
187    fn supports_parallel_tool_config(&self, _model: &str) -> bool {
188        false
189    }
190
191    /// Whether the provider supports structured output (JSON schema guarantees)
192    fn supports_structured_output(&self, _model: &str) -> bool {
193        false
194    }
195
196    /// Whether the provider supports prompt/context caching
197    fn supports_context_caching(&self, _model: &str) -> bool {
198        false
199    }
200
201    /// Whether the provider supports vision (image analysis) for given model
202    fn supports_vision(&self, _model: &str) -> bool {
203        false
204    }
205
206    /// Whether the provider supports Responses API server-side compaction.
207    fn supports_responses_compaction(&self, _model: &str) -> bool {
208        false
209    }
210
211    /// Whether the provider supports provider-native context editing such as
212    /// tool-result clearing.
213    fn supports_context_edits(&self, _model: &str) -> bool {
214        false
215    }
216
217    /// Whether the provider supports the interactive manual `/compact` command path.
218    ///
219    /// This is narrower than general Responses compaction support and may exclude
220    /// compatible endpoints that do not match VT Code's native OpenAI UX contract.
221    fn supports_manual_openai_compaction(&self, _model: &str) -> bool {
222        false
223    }
224
225    /// Explain why the interactive manual `/compact` command path is unavailable.
226    fn manual_openai_compaction_unavailable_message(&self, model: &str) -> String {
227        format!(
228            "Manual `/compact` is available only for the native OpenAI provider on api.openai.com with a Responses-compatible OpenAI model. Active provider/backend/model: {} / provider does not expose native OpenAI manual compaction / {}.",
229            self.name(),
230            model,
231        )
232    }
233
234    /// Get the effective context window size for a model
235    fn effective_context_size(&self, _model: &str) -> usize {
236        // Default to 128k context window (common baseline)
237        128_000
238    }
239
240    /// Compact conversation history using provider-native Responses `/compact`
241    /// support when available.
242    async fn compact_history(
243        &self,
244        _model: &str,
245        _history: &[Message],
246    ) -> Result<Vec<Message>, LLMError> {
247        Err(LLMError::Provider {
248            message: "Conversation compaction is not supported by this provider".to_string(),
249            metadata: None,
250        })
251    }
252
253    /// Compact conversation history with standalone Responses compaction options.
254    async fn compact_history_with_options(
255        &self,
256        _model: &str,
257        _history: &[Message],
258        _options: &ResponsesCompactionOptions,
259    ) -> Result<Vec<Message>, LLMError> {
260        Err(LLMError::Provider {
261            message: "manual OpenAI compaction is not supported by this provider".to_string(),
262            metadata: None,
263        })
264    }
265
266    /// Generate completion
267    async fn generate(&self, request: LLMRequest) -> Result<LLMResponse, LLMError>;
268
269    /// Stream completion (optional)
270    async fn stream(&self, request: LLMRequest) -> Result<LLMStream, LLMError> {
271        // Default implementation falls back to non-streaming
272        let response = self.generate(request).await?;
273        let stream = try_stream! {
274            yield LLMStreamEvent::Completed { response: Box::new(response) };
275        };
276        Ok(Box::pin(stream))
277    }
278
279    /// Normalized streaming contract layered on top of the legacy provider stream.
280    async fn stream_normalized(
281        &self,
282        request: LLMRequest,
283    ) -> Result<LLMNormalizedStream, LLMError> {
284        let mut legacy_stream = self.stream(request).await?;
285        let stream = try_stream! {
286            while let Some(event) = futures::StreamExt::next(&mut legacy_stream).await {
287                for normalized in event?.into_normalized() {
288                    yield normalized;
289                }
290            }
291        };
292        Ok(Box::pin(stream))
293    }
294
295    /// Provider-specific streaming path that can service interactive runtime
296    /// requests while the stream is active. Copilot uses this to bridge ACP
297    /// tool calls and permission prompts back into VT Code's turn runtime.
298    fn start_copilot_prompt_session<'a>(
299        &'a self,
300        _request: LLMRequest,
301        _tools: &'a [super::ToolDefinition],
302    ) -> Option<crate::copilot::CopilotPromptSessionFuture<'a>> {
303        None
304    }
305
306    /// Get supported models
307    fn supported_models(&self) -> Vec<String>;
308
309    /// Fetch account balance for this provider, if supported.
310    async fn get_balance(&self) -> Result<Option<vtcode_commons::llm::BalanceInfo>, LLMError> {
311        Ok(None)
312    }
313
314    /// Validate request for this provider
315    fn validate_request(&self, request: &LLMRequest) -> Result<(), LLMError>;
316}