multi_llm/providers/
lmstudio.rs

1//! LM Studio provider implementation
2//!
3//! LM Studio uses OpenAI-compatible API, so this implementation leverages
4//! the shared OpenAI structures and utilities.
5
6use super::openai_shared::{
7    http::OpenAICompatibleClient, utils::apply_config_to_request, OpenAIRequest, OpenAIResponse,
8};
9// Removed LLMClientCore import - providers now implement their own methods directly
10use crate::config::{DefaultLLMParams, LMStudioConfig};
11use crate::error::{LlmError, LlmResult};
12#[cfg(feature = "events")]
13use crate::internals::events::{event_types, BusinessEvent, EventScope};
14use crate::internals::response_parser::ResponseParser;
15use crate::logging::log_debug;
16use crate::messages::{MessageContent, MessageRole, UnifiedLLMRequest, UnifiedMessage};
17#[cfg(feature = "events")]
18use crate::provider::LLMBusinessEvent;
19use crate::provider::{LlmProvider, RequestConfig, Response, TokenUsage, ToolCallingRound};
20use std::time::Instant;
21
22/// LM Studio local provider implementation
23///
24/// Uses OpenAI-compatible API endpoints for local model inference
25#[derive(Debug)]
26pub struct LMStudioProvider {
27    http_client: OpenAICompatibleClient,
28    config: LMStudioConfig,
29    default_params: DefaultLLMParams,
30}
31
32impl LMStudioProvider {
33    /// Create a new LM Studio provider instance
34    ///
35    /// # Errors
36    ///
37    /// Returns [`LlmError::ConfigurationError`] if:
38    /// - Base URL is missing or invalid
39    /// - Provider configuration validation fails
40    /// - HTTP client initialization fails
41    pub fn new(config: LMStudioConfig, default_params: DefaultLLMParams) -> LlmResult<Self> {
42        // log_debug!(
43        //     provider = "lmstudio",
44        //     base_url = %config.base_url,
45        //     max_context_tokens = config.max_context_tokens,
46        //     default_model = %config.default_model,
47        //     default_temperature = default_params.temperature,
48        //     "Creating LM Studio provider"
49        // );
50
51        if config.base_url.is_empty() {
52            return Err(LlmError::configuration_error(
53                "LM Studio base URL is required",
54            ));
55        }
56
57        log_debug!(
58            provider = "lmstudio",
59            base_url = %config.base_url,
60            max_context_tokens = config.max_context_tokens,
61            default_temperature = default_params.temperature,
62            "LM Studio provider initialized"
63        );
64
65        Ok(Self {
66            http_client: OpenAICompatibleClient::with_retry_policy(config.retry_policy.clone()),
67            config,
68            default_params,
69        })
70    }
71
72    /// Create base OpenAI-compatible request
73    fn create_base_request(&self, request: &UnifiedLLMRequest) -> OpenAIRequest {
74        let openai_messages = self.transform_unified_messages(&request.get_sorted_messages());
75
76        OpenAIRequest {
77            model: self.config.default_model.clone(),
78            messages: openai_messages,
79            temperature: Some(self.default_params.temperature),
80            max_tokens: Some(self.default_params.max_tokens),
81            top_p: Some(self.default_params.top_p),
82            stream: None,
83            presence_penalty: None,
84            tools: None,
85            tool_choice: None,
86            response_format: None,
87        }
88    }
89
90    /// Send request to LM Studio API
91    async fn send_lmstudio_request(&self, request: &OpenAIRequest) -> LlmResult<OpenAIResponse> {
92        let url = format!("{}/v1/chat/completions", self.config.base_url);
93        let mut headers = reqwest::header::HeaderMap::new();
94        headers.insert(
95            reqwest::header::CONTENT_TYPE,
96            reqwest::header::HeaderValue::from_static("application/json"),
97        );
98
99        self.http_client
100            .execute_chat_request(&url, &headers, request)
101            .await
102            .map_err(|e| {
103                LlmError::request_failed(format!("LM Studio API error: {}", e), Some(Box::new(e)))
104            })
105    }
106
107    /// Apply response schema to request
108    fn apply_response_schema(
109        &self,
110        request: &mut OpenAIRequest,
111        schema: Option<serde_json::Value>,
112    ) {
113        if let Some(schema) = schema {
114            request.response_format = Some(super::openai_shared::OpenAIResponseFormat {
115                format_type: "json_schema".to_string(),
116                json_schema: Some(super::openai_shared::OpenAIJsonSchema {
117                    name: "structured_response".to_string(),
118                    schema,
119                    strict: Some(true),
120                }),
121            });
122        }
123    }
124
125    /// Create LLM request business event
126    #[cfg(feature = "events")]
127    fn create_request_event(&self, model: &str, user_id: &str) -> LLMBusinessEvent {
128        let event = BusinessEvent::new(event_types::LLM_REQUEST)
129            .with_metadata("provider", "lmstudio")
130            .with_metadata("model", model);
131
132        LLMBusinessEvent {
133            event,
134            scope: EventScope::User(user_id.to_string()),
135        }
136    }
137
138    /// Create LLM error business event
139    #[cfg(feature = "events")]
140    fn create_error_event(&self, error: &LlmError, user_id: &str) -> LLMBusinessEvent {
141        let event = BusinessEvent::new(event_types::LLM_ERROR)
142            .with_metadata("provider", "lmstudio")
143            .with_metadata("error", error.to_string());
144
145        LLMBusinessEvent {
146            event,
147            scope: EventScope::User(user_id.to_string()),
148        }
149    }
150
151    /// Create LLM response business event
152    #[cfg(feature = "events")]
153    fn create_response_event(
154        &self,
155        response: &OpenAIResponse,
156        duration_ms: u64,
157        config: Option<&RequestConfig>,
158    ) -> Option<LLMBusinessEvent> {
159        let user_id = config.and_then(|c| c.user_id.as_ref())?;
160
161        let usage_tokens = response
162            .usage
163            .as_ref()
164            .map(|u| (u.prompt_tokens, u.completion_tokens));
165        let mut event = BusinessEvent::new(event_types::LLM_RESPONSE)
166            .with_metadata("provider", "lmstudio")
167            .with_metadata("model", &self.config.default_model)
168            .with_metadata("input_tokens", usage_tokens.map(|(i, _)| i).unwrap_or(0))
169            .with_metadata("output_tokens", usage_tokens.map(|(_, o)| o).unwrap_or(0))
170            .with_metadata("duration_ms", duration_ms);
171
172        if let Some(sess_id) = config.and_then(|c| c.session_id.as_ref()) {
173            event = event.with_metadata("session_id", sess_id);
174        }
175
176        Some(LLMBusinessEvent {
177            event,
178            scope: EventScope::User(user_id.clone()),
179        })
180    }
181
182    /// Core LLM execution logic shared between events and non-events versions
183    async fn execute_llm_internal(
184        &self,
185        request: UnifiedLLMRequest,
186        config: Option<RequestConfig>,
187    ) -> crate::provider::Result<(Response, OpenAIResponse, u64, OpenAIRequest)> {
188        // Create base request and apply config
189        let mut openai_request = self.create_base_request(&request);
190        if let Some(cfg) = config.as_ref() {
191            apply_config_to_request(&mut openai_request, Some(cfg.clone()));
192        }
193        self.apply_response_schema(&mut openai_request, request.response_schema);
194
195        log_debug!(
196            provider = "lmstudio",
197            request_json = %serde_json::to_string(&openai_request).unwrap_or_default(),
198            "Executing LLM request"
199        );
200
201        // Clone request for event creation
202        let openai_request_for_events = openai_request.clone();
203
204        // Send to LM Studio API
205        let start_time = Instant::now();
206        let api_response = self.send_lmstudio_request(&openai_request).await?;
207        let duration_ms = start_time.elapsed().as_millis() as u64;
208
209        // Parse response
210        let response = self.parse_lmstudio_response(api_response.clone())?;
211
212        Ok((
213            response,
214            api_response,
215            duration_ms,
216            openai_request_for_events,
217        ))
218    }
219
220    /// Internal method for executor pattern - restore default retry policy
221    pub(crate) async fn restore_default_retry_policy(&self) {
222        // LMStudio provider doesn't need explicit retry policy restoration
223        // The client manages retry state internally
224    }
225}
226
227#[async_trait::async_trait]
228impl LlmProvider for LMStudioProvider {
229    #[cfg(feature = "events")]
230    async fn execute_llm(
231        &self,
232        request: UnifiedLLMRequest,
233        _current_tool_round: Option<ToolCallingRound>,
234        config: Option<RequestConfig>,
235    ) -> crate::provider::Result<(Response, Vec<LLMBusinessEvent>)> {
236        let mut events = Vec::new();
237
238        // Execute core logic and collect event data
239        let (response, api_response, duration_ms, openai_request) =
240            match self.execute_llm_internal(request, config.clone()).await {
241                Ok(result) => result,
242                Err(e) => {
243                    // On error, log error event
244                    if let Some(uid) = config.as_ref().and_then(|c| c.user_id.as_ref()) {
245                        events.push(self.create_error_event(&e, uid));
246                    }
247                    return Err(e);
248                }
249            };
250
251        // Log request event
252        if let Some(uid) = config.as_ref().and_then(|c| c.user_id.as_ref()) {
253            events.push(self.create_request_event(&openai_request.model, uid));
254        }
255
256        // Log response event
257        if let Some(event) = self.create_response_event(&api_response, duration_ms, config.as_ref())
258        {
259            events.push(event);
260        }
261
262        Ok((response, events))
263    }
264
265    #[cfg(not(feature = "events"))]
266    async fn execute_llm(
267        &self,
268        request: UnifiedLLMRequest,
269        _current_tool_round: Option<ToolCallingRound>,
270        config: Option<RequestConfig>,
271    ) -> crate::provider::Result<Response> {
272        let (response, _api_response, _duration_ms, _openai_request) =
273            self.execute_llm_internal(request, config).await?;
274        Ok(response)
275    }
276
277    #[cfg(feature = "events")]
278    async fn execute_structured_llm(
279        &self,
280        mut request: UnifiedLLMRequest,
281        current_tool_round: Option<ToolCallingRound>,
282        schema: serde_json::Value,
283        config: Option<RequestConfig>,
284    ) -> crate::provider::Result<(Response, Vec<LLMBusinessEvent>)> {
285        // Set the schema in the request
286        request.response_schema = Some(schema);
287
288        // Execute with the schema-enabled request (returns tuple with events)
289        self.execute_llm(request, current_tool_round, config).await
290    }
291
292    #[cfg(not(feature = "events"))]
293    async fn execute_structured_llm(
294        &self,
295        mut request: UnifiedLLMRequest,
296        current_tool_round: Option<ToolCallingRound>,
297        schema: serde_json::Value,
298        config: Option<RequestConfig>,
299    ) -> crate::provider::Result<Response> {
300        // Set the schema in the request
301        request.response_schema = Some(schema);
302
303        // Execute with the schema-enabled request
304        self.execute_llm(request, current_tool_round, config).await
305    }
306
307    fn provider_name(&self) -> &'static str {
308        "lmstudio"
309    }
310}
311
312impl LMStudioProvider {
313    /// Transform unified messages to OpenAI-compatible format for LMStudio
314    /// LMStudio has no caching support, so we ignore caching attributes
315    fn transform_unified_messages(
316        &self,
317        messages: &[&UnifiedMessage],
318    ) -> Vec<super::openai_shared::OpenAIMessage> {
319        messages
320            .iter()
321            .map(|msg| self.unified_message_to_openai(msg))
322            .collect()
323    }
324
325    /// Convert a UnifiedMessage to OpenAI format for LMStudio
326    /// Note: LMStudio has no caching support, so cacheable attributes are ignored
327    fn unified_message_to_openai(
328        &self,
329        msg: &UnifiedMessage,
330    ) -> super::openai_shared::OpenAIMessage {
331        let role = match msg.role {
332            MessageRole::System => "system".to_string(),
333            MessageRole::User => "user".to_string(),
334            MessageRole::Assistant => "assistant".to_string(),
335            MessageRole::Tool => "user".to_string(), // LMStudio doesn't have native tool role, use user
336        };
337
338        let content = match &msg.content {
339            MessageContent::Text(text) => text.clone(),
340            MessageContent::Json(value) => serde_json::to_string_pretty(value).unwrap_or_default(),
341            MessageContent::ToolCall { .. } => {
342                // We should never be sending tool calls TO the LLM
343                log_debug!(provider = "lmstudio", "Unexpected ToolCall in outgoing message - tool calls are received from LLM, not sent to it");
344                "Error: Invalid message type".to_string()
345            }
346            MessageContent::ToolResult {
347                content, is_error, ..
348            } => {
349                // Tool results become user messages for LMStudio
350                if *is_error {
351                    format!("Tool execution error: {}", content)
352                } else {
353                    format!("Tool execution result: {}", content)
354                }
355            }
356        };
357
358        super::openai_shared::OpenAIMessage { role, content }
359    }
360
361    /// Parse LMStudio response to Response
362    /// LMStudio uses OpenAI-compatible response format
363    fn parse_lmstudio_response(&self, response: OpenAIResponse) -> LlmResult<Response> {
364        let choice =
365            response.choices.into_iter().next().ok_or_else(|| {
366                LlmError::response_parsing_error("No choices in LMStudio response")
367            })?;
368
369        let content = choice.message.content;
370
371        // LMStudio may have limited tool support, handle gracefully
372        let tool_calls = choice
373            .message
374            .tool_calls
375            .unwrap_or_default()
376            .into_iter()
377            .map(|tc| crate::provider::ToolCall {
378                id: tc.id,
379                name: tc.function.name,
380                arguments: serde_json::from_str(&tc.function.arguments)
381                    .unwrap_or(serde_json::Value::Null),
382            })
383            .collect();
384
385        // LMStudio may not provide usage stats
386        let usage = response.usage.map(|u| TokenUsage {
387            prompt_tokens: u.prompt_tokens,
388            completion_tokens: u.completion_tokens,
389            total_tokens: u.total_tokens,
390        });
391
392        // Handle structured response parsing if needed
393        // Local models may be less reliable with JSON formatting
394        let structured_response = if content.trim_start().starts_with('{') {
395            match ResponseParser::parse_llm_output(&content) {
396                Ok(json_value) => {
397                    log_debug!(
398                        provider = "lmstudio",
399                        "Successfully parsed structured JSON response"
400                    );
401                    Some(json_value)
402                }
403                Err(_) => {
404                    log_debug!(provider = "lmstudio", "Failed to parse structured response from local model - this is common with local LLMs");
405                    None
406                }
407            }
408        } else {
409            None
410        };
411
412        Ok(Response {
413            content,
414            structured_response,
415            tool_calls,
416            usage,
417            model: Some(self.config.default_model.clone()),
418            raw_body: None,
419        })
420    }
421}