Skip to main content

vtcode_core/llm/providers/
lmstudio.rs

1//! LM Studio provider implementation
2//!
3//! LM Studio provides multiple API surfaces:
4//! - OpenAI-compatible endpoints at `/v1/*` (used by this implementation)
5//! - Native REST API at `/api/v0/*` (enhanced stats, model info, model management)
6//! - Tool calling (since 0.3.6), structured output, and reasoning content (since 0.3.9)
7//!
8//! This implementation uses OpenAI-compatible endpoints for maximum compatibility.
9//! The native REST API at `/api/v0/*` provides richer model metadata, load/unload
10//! endpoints, and TTL-based auto-evict for JIT-loaded models.
11//!
12//! See: <https://lmstudio.ai/docs/developer>
13
14use super::common::resolve_model;
15use super::openai::OpenAIProvider;
16use crate::config::TimeoutsConfig;
17use crate::config::constants::{env_vars, models, urls};
18use crate::config::core::{AnthropicConfig, ModelConfig, PromptCachingConfig};
19use crate::llm::client::LLMClient;
20use crate::llm::error_display;
21use crate::llm::provider::{LLMError, LLMProvider, LLMRequest, LLMResponse, LLMStream};
22use crate::llm::providers::common::override_base_url;
23use crate::utils::http_client;
24use anyhow::Result;
25use async_trait::async_trait;
26use serde::{Deserialize, Serialize};
27
28pub mod client;
29
30pub use client::LMStudioClient;
31
32#[derive(Debug, Deserialize, Serialize)]
33struct LmStudioModelsResponse {
34    data: Vec<LmStudioModel>,
35}
36
37#[derive(Debug, Deserialize, Serialize)]
38struct LmStudioModel {
39    id: String,
40    #[serde(default)]
41    object: Option<String>,
42    #[serde(default)]
43    created: Option<u64>,
44    #[serde(default)]
45    owned_by: Option<String>,
46}
47
48pub use client::LMSTUDIO_CONNECTION_ERROR;
49
50/// Derives the server root URL by stripping the `/v1` suffix from the API base.
51///
52/// `LMSTUDIO_API_BASE` is `http://localhost:1234/v1`. The native REST API
53/// lives at `/api/v0/*` on the server root, so we need `http://localhost:1234`.
54fn server_root_from_api_base(api_base: &str) -> String {
55    let trimmed = api_base.trim_end_matches('/');
56    trimmed.strip_suffix("/v1").unwrap_or(trimmed).to_string()
57}
58
59/// Fetches available models from the LM Studio API endpoint
60///
61/// Uses OpenAI-compatible `/v1/models` endpoint by default.
62/// Set `LMSTUDIO_USE_NATIVE_API=true` to use native REST API at `/api/v0/models`.
63pub async fn fetch_lmstudio_models(base_url: Option<String>) -> Result<Vec<String>, anyhow::Error> {
64    let resolved_base_url = override_base_url(
65        urls::LMSTUDIO_API_BASE,
66        base_url,
67        Some(env_vars::LMSTUDIO_BASE_URL),
68    );
69
70    let use_native_api = std::env::var("LMSTUDIO_USE_NATIVE_API")
71        .ok()
72        .and_then(|v| v.parse::<bool>().ok())
73        .unwrap_or(false);
74
75    // LMSTUDIO_API_BASE already includes `/v1`, so for the OpenAI-compatible
76    // endpoint we append `/models` directly. For the native REST API we need
77    // the server root (without `/v1`) and then append `/api/v0/models`.
78    let models_url = if use_native_api {
79        let root = server_root_from_api_base(&resolved_base_url);
80        format!("{root}/api/v0/models")
81    } else {
82        format!("{}/models", resolved_base_url.trim_end_matches('/'))
83    };
84
85    // Create HTTP client with connection timeout
86    let client = http_client::create_client_with_timeout(std::time::Duration::from_secs(5));
87
88    // Make GET request to fetch models
89    let response = client
90        .get(&models_url)
91        .header("Content-Type", "application/json")
92        .send()
93        .await
94        .map_err(|e| {
95            tracing::warn!("Failed to connect to LM Studio server: {e:?}");
96            anyhow::anyhow!(LMSTUDIO_CONNECTION_ERROR)
97        })?;
98
99    if !response.status().is_success() {
100        return Err(anyhow::anyhow!(
101            "Failed to fetch LM Studio models: HTTP {}. {}",
102            response.status(),
103            if response.status() == reqwest::StatusCode::NOT_FOUND {
104                "Ensure LM Studio server is running with 'lms server start'."
105            } else {
106                ""
107            }
108        ));
109    }
110
111    // Parse the response
112    let models_response: LmStudioModelsResponse = response
113        .json()
114        .await
115        .map_err(|e| anyhow::anyhow!("Failed to parse LM Studio models response: {}", e))?;
116
117    // Extract model IDs
118    let model_ids: Vec<String> = models_response
119        .data
120        .into_iter()
121        .map(|model| model.id)
122        .collect();
123
124    Ok(model_ids)
125}
126
127pub struct LmStudioProvider {
128    inner: OpenAIProvider,
129}
130
131impl LmStudioProvider {
132    fn resolve_base_url(base_url: Option<String>) -> String {
133        override_base_url(
134            urls::LMSTUDIO_API_BASE,
135            base_url,
136            Some(env_vars::LMSTUDIO_BASE_URL),
137        )
138    }
139
140    fn build_inner(
141        api_key: Option<String>,
142        model: Option<String>,
143        base_url: Option<String>,
144        prompt_cache: Option<PromptCachingConfig>,
145        timeouts: Option<TimeoutsConfig>,
146        anthropic: Option<AnthropicConfig>,
147        model_behavior: Option<ModelConfig>,
148    ) -> OpenAIProvider {
149        let resolved_model = resolve_model(model, models::lmstudio::DEFAULT_MODEL);
150        let resolved_base = Self::resolve_base_url(base_url);
151        OpenAIProvider::from_config(
152            api_key,
153            None,
154            Some(resolved_model),
155            Some(resolved_base),
156            prompt_cache,
157            timeouts,
158            anthropic,
159            None,
160            model_behavior,
161        )
162    }
163
164    pub fn new(api_key: String) -> Self {
165        Self::with_model(api_key, models::lmstudio::DEFAULT_MODEL.to_string())
166    }
167
168    pub fn with_model(api_key: String, model: String) -> Self {
169        Self::with_model_internal(Some(api_key), Some(model), None, None, None)
170    }
171
172    pub fn new_with_client(
173        model: String,
174        http_client: reqwest::Client,
175        base_url: String,
176        timeouts: TimeoutsConfig,
177    ) -> Self {
178        let inner = OpenAIProvider::new_with_client(
179            "lm-studio".to_string(), // Dummy API key
180            None,
181            model,
182            http_client,
183            base_url,
184            timeouts,
185        );
186        Self { inner }
187    }
188
189    pub fn from_config(
190        api_key: Option<String>,
191        model: Option<String>,
192        base_url: Option<String>,
193        prompt_cache: Option<PromptCachingConfig>,
194        timeouts: Option<TimeoutsConfig>,
195        anthropic: Option<AnthropicConfig>,
196        model_behavior: Option<ModelConfig>,
197    ) -> Self {
198        let inner = Self::build_inner(
199            api_key,
200            model,
201            base_url,
202            prompt_cache,
203            timeouts,
204            anthropic,
205            model_behavior,
206        );
207        Self { inner }
208    }
209
210    fn with_model_internal(
211        api_key: Option<String>,
212        model: Option<String>,
213        base_url: Option<String>,
214        prompt_cache: Option<PromptCachingConfig>,
215        model_behavior: Option<ModelConfig>,
216    ) -> Self {
217        let inner = Self::build_inner(
218            api_key,
219            model,
220            base_url,
221            prompt_cache,
222            None,
223            None,
224            model_behavior,
225        );
226        Self { inner }
227    }
228}
229
230#[async_trait]
231impl LLMProvider for LmStudioProvider {
232    fn name(&self) -> &str {
233        "lmstudio"
234    }
235
236    fn supports_streaming(&self) -> bool {
237        self.inner.supports_streaming()
238    }
239
240    fn supports_reasoning(&self, model: &str) -> bool {
241        self.inner.supports_reasoning(model)
242    }
243
244    fn supports_reasoning_effort(&self, model: &str) -> bool {
245        self.inner.supports_reasoning_effort(model)
246    }
247
248    fn supports_tools(&self, model: &str) -> bool {
249        self.inner.supports_tools(model)
250    }
251
252    fn supports_parallel_tool_config(&self, model: &str) -> bool {
253        self.inner.supports_parallel_tool_config(model)
254    }
255
256    async fn generate(&self, request: LLMRequest) -> Result<LLMResponse, LLMError> {
257        self.inner.generate(request).await
258    }
259
260    async fn stream(&self, request: LLMRequest) -> Result<LLMStream, LLMError> {
261        self.inner.stream(request).await
262    }
263
264    fn supported_models(&self) -> Vec<String> {
265        // Hardcoded models prevent expensive network calls. Future enhancements:
266        // 1. Lazy initialization via once_cell to fetch models at startup
267        // 2. Dynamic fetching with proper caching to avoid repeated network calls
268        models::lmstudio::SUPPORTED_MODELS
269            .iter()
270            .map(|model| model.to_string())
271            .collect()
272    }
273
274    fn validate_request(&self, request: &LLMRequest) -> Result<(), LLMError> {
275        if request.messages.is_empty() {
276            let formatted_error =
277                error_display::format_llm_error("LM Studio", "Messages cannot be empty");
278            return Err(LLMError::InvalidRequest {
279                message: formatted_error,
280                metadata: None,
281            });
282        }
283
284        // Validate messages against provider's requirements
285        for message in &request.messages {
286            if let Err(err) = message.validate_for_provider("openai") {
287                let formatted = error_display::format_llm_error("LM Studio", &err);
288                return Err(LLMError::InvalidRequest {
289                    message: formatted,
290                    metadata: None,
291                });
292            }
293        }
294
295        Ok(())
296    }
297}
298
299#[async_trait]
300impl LLMClient for LmStudioProvider {
301    async fn generate(&mut self, prompt: &str) -> Result<LLMResponse, LLMError> {
302        LLMClient::generate(&mut self.inner, prompt).await
303    }
304
305    fn model_id(&self) -> &str {
306        self.inner.model_id()
307    }
308}