vtcode_core/llm/provider/
provider_trait.rs1use async_stream::try_stream;
2use async_trait::async_trait;
3use compact_str::format_compact;
4use once_cell::sync::Lazy;
5use rustc_hash::FxHashMap;
6use std::sync::RwLock;
7use vtcode_commons::llm::BackendKind;
8
9use crate::types::CompactStr;
10
11use super::{
12 LLMNormalizedStream, LLMRequest, LLMResponse, LLMStream, LLMStreamEvent, Message,
13 ResponsesCompactionOptions,
14};
15pub use vtcode_commons::llm::{LLMError, LLMErrorMetadata};
16
17#[derive(Debug, Clone)]
19pub struct ProviderCapabilities {
20 pub provider_name: String,
21 pub model: String,
22 pub streaming: bool,
23 pub reasoning: bool,
24 pub reasoning_effort: bool,
25 pub tools: bool,
26 pub parallel_tool_config: bool,
27 pub structured_output: bool,
28 pub context_caching: bool,
29 pub responses_compaction: bool,
30 pub context_edits: bool,
31 pub vision: bool,
32 pub context_size: usize,
33}
34
35impl ProviderCapabilities {
36 pub fn detect(provider: &dyn LLMProvider, model: &str) -> Self {
37 Self {
38 provider_name: provider.name().to_string(),
39 model: model.to_string(),
40 streaming: provider.supports_streaming(),
41 reasoning: provider.supports_reasoning(model),
42 reasoning_effort: provider.supports_reasoning_effort(model),
43 tools: provider.supports_tools(model),
44 parallel_tool_config: provider.supports_parallel_tool_config(model),
45 structured_output: provider.supports_structured_output(model),
46 context_caching: provider.supports_context_caching(model),
47 responses_compaction: provider.supports_responses_compaction(model),
48 context_edits: provider.supports_context_edits(model),
49 vision: provider.supports_vision(model),
50 context_size: provider.effective_context_size(model),
51 }
52 }
53
54 pub fn has_advanced_features(&self) -> bool {
55 self.reasoning || self.structured_output || self.context_caching || self.reasoning_effort
56 }
57
58 pub fn summary(&self) -> String {
59 let mut features = Vec::new();
60
61 if self.streaming {
62 features.push("streaming");
63 }
64 if self.reasoning {
65 features.push("advanced-reasoning");
66 }
67 if self.reasoning_effort {
68 features.push("reasoning-effort");
69 }
70 if self.structured_output {
71 features.push("structured-output");
72 }
73 if self.context_caching {
74 features.push("context-caching");
75 }
76 if self.parallel_tool_config {
77 features.push("parallel-tools");
78 }
79 if self.responses_compaction {
80 features.push("responses-compaction");
81 }
82 if self.context_edits {
83 features.push("context-edits");
84 }
85
86 let features_str = if features.is_empty() {
87 "basic".to_string()
88 } else {
89 features.join(", ")
90 };
91
92 format!(
93 "{} ({} tokens): {}",
94 self.model, self.context_size, features_str
95 )
96 }
97}
98
99static CAPABILITY_CACHE: Lazy<RwLock<FxHashMap<CompactStr, ProviderCapabilities>>> =
101 Lazy::new(|| RwLock::new(FxHashMap::default()));
102
103pub fn get_cached_capabilities(provider: &dyn LLMProvider, model: &str) -> ProviderCapabilities {
105 let cache_key = format_compact!("{}::{}", provider.name(), model);
106
107 if let Ok(cache) = CAPABILITY_CACHE.read()
109 && let Some(caps) = cache.get(&cache_key)
110 {
111 return caps.clone();
112 }
113
114 let caps = ProviderCapabilities::detect(provider, model);
116
117 if let Ok(mut cache) = CAPABILITY_CACHE.write() {
119 cache.insert(cache_key, caps.clone());
120 }
121
122 caps
123}
124
125#[async_trait]
127pub trait LLMProvider: Send + Sync {
128 fn name(&self) -> &str;
130
131 fn backend_kind(&self) -> BackendKind {
137 match self.name() {
138 "gemini" => BackendKind::Gemini,
139 "openai" => BackendKind::OpenAI,
140 "anthropic" => BackendKind::Anthropic,
141 "deepseek" => BackendKind::DeepSeek,
142 "mistral" => BackendKind::Mistral,
143 "openrouter" => BackendKind::OpenRouter,
144 "ollama" => BackendKind::Ollama,
145 "llamacpp" => BackendKind::LlamaCpp,
146 "zai" => BackendKind::ZAI,
147 "moonshot" => BackendKind::Moonshot,
148 "huggingface" => BackendKind::HuggingFace,
149 "minimax" => BackendKind::Minimax,
150 "mimo" => BackendKind::MiMo,
151 "opencode-zen" => BackendKind::OpenCodeZen,
152 "opencode-go" => BackendKind::OpenCodeGo,
153 "qwen" => BackendKind::Qwen,
154 "stepfun" => BackendKind::StepFun,
155 "evolink" => BackendKind::Evolink,
156 "poolside" => BackendKind::Poolside,
157 _ => BackendKind::OpenAI,
158 }
159 }
160
161 fn supports_streaming(&self) -> bool {
163 false
164 }
165
166 fn supports_non_streaming(&self, _model: &str) -> bool {
168 true
169 }
170
171 fn supports_reasoning(&self, _model: &str) -> bool {
173 false
174 }
175
176 fn supports_reasoning_effort(&self, _model: &str) -> bool {
178 false
179 }
180
181 fn supports_tools(&self, _model: &str) -> bool {
183 true
184 }
185
186 fn supports_parallel_tool_config(&self, _model: &str) -> bool {
188 false
189 }
190
191 fn supports_structured_output(&self, _model: &str) -> bool {
193 false
194 }
195
196 fn supports_context_caching(&self, _model: &str) -> bool {
198 false
199 }
200
201 fn supports_vision(&self, _model: &str) -> bool {
203 false
204 }
205
206 fn supports_responses_compaction(&self, _model: &str) -> bool {
208 false
209 }
210
211 fn supports_context_edits(&self, _model: &str) -> bool {
214 false
215 }
216
217 fn supports_manual_openai_compaction(&self, _model: &str) -> bool {
222 false
223 }
224
225 fn manual_openai_compaction_unavailable_message(&self, model: &str) -> String {
227 format!(
228 "Manual `/compact` is available only for the native OpenAI provider on api.openai.com with a Responses-compatible OpenAI model. Active provider/backend/model: {} / provider does not expose native OpenAI manual compaction / {}.",
229 self.name(),
230 model,
231 )
232 }
233
234 fn effective_context_size(&self, _model: &str) -> usize {
236 128_000
238 }
239
240 async fn compact_history(
243 &self,
244 _model: &str,
245 _history: &[Message],
246 ) -> Result<Vec<Message>, LLMError> {
247 Err(LLMError::Provider {
248 message: "Conversation compaction is not supported by this provider".to_string(),
249 metadata: None,
250 })
251 }
252
253 async fn compact_history_with_options(
255 &self,
256 _model: &str,
257 _history: &[Message],
258 _options: &ResponsesCompactionOptions,
259 ) -> Result<Vec<Message>, LLMError> {
260 Err(LLMError::Provider {
261 message: "manual OpenAI compaction is not supported by this provider".to_string(),
262 metadata: None,
263 })
264 }
265
266 async fn generate(&self, request: LLMRequest) -> Result<LLMResponse, LLMError>;
268
269 async fn stream(&self, request: LLMRequest) -> Result<LLMStream, LLMError> {
271 let response = self.generate(request).await?;
273 let stream = try_stream! {
274 yield LLMStreamEvent::Completed { response: Box::new(response) };
275 };
276 Ok(Box::pin(stream))
277 }
278
279 async fn stream_normalized(
281 &self,
282 request: LLMRequest,
283 ) -> Result<LLMNormalizedStream, LLMError> {
284 let mut legacy_stream = self.stream(request).await?;
285 let stream = try_stream! {
286 while let Some(event) = futures::StreamExt::next(&mut legacy_stream).await {
287 for normalized in event?.into_normalized() {
288 yield normalized;
289 }
290 }
291 };
292 Ok(Box::pin(stream))
293 }
294
295 fn start_copilot_prompt_session<'a>(
299 &'a self,
300 _request: LLMRequest,
301 _tools: &'a [super::ToolDefinition],
302 ) -> Option<crate::copilot::CopilotPromptSessionFuture<'a>> {
303 None
304 }
305
306 fn supported_models(&self) -> Vec<String>;
308
309 async fn get_balance(&self) -> Result<Option<vtcode_commons::llm::BalanceInfo>, LLMError> {
311 Ok(None)
312 }
313
314 fn validate_request(&self, request: &LLMRequest) -> Result<(), LLMError>;
316}