lc/core/
chat.rs

1use crate::config::Config;
2use crate::database::ChatEntry;
3use crate::model_metadata::MetadataExtractor;
4use crate::provider::{ChatRequest, Message, MessageContent, OpenAIClient};
5use crate::token_utils::TokenCounter;
6use anyhow::Result;
7use chrono::{DateTime, Utc};
8
9pub async fn send_chat_request_with_validation(
10    client: &LLMClient,
11    model: &str,
12    prompt: &str,
13    history: &[ChatEntry],
14    system_prompt: Option<&str>,
15    max_tokens: Option<u32>,
16    temperature: Option<f32>,
17    provider_name: &str,
18    tools: Option<Vec<crate::provider::Tool>>,
19) -> Result<(String, Option<i32>, Option<i32>)> {
20    crate::debug_log!("Sending chat request - provider: '{}', model: '{}', prompt length: {}, history entries: {}",
21                      provider_name, model, prompt.len(), history.len());
22    crate::debug_log!(
23        "Request parameters - max_tokens: {:?}, temperature: {:?}",
24        max_tokens,
25        temperature
26    );
27
28    // Try to get model metadata for context validation
29    crate::debug_log!(
30        "Loading model metadata for provider '{}', model '{}'",
31        provider_name,
32        model
33    );
34    let model_metadata = get_model_metadata(provider_name, model).await;
35
36    if let Some(ref metadata) = model_metadata {
37        crate::debug_log!(
38            "Found metadata for model '{}' - context_length: {:?}, max_output: {:?}",
39            model,
40            metadata.context_length,
41            metadata.max_output_tokens
42        );
43    } else {
44        crate::debug_log!("No metadata found for model '{}'", model);
45    }
46
47    // Create token counter
48    crate::debug_log!("Creating token counter for model '{}'", model);
49    let token_counter = match TokenCounter::new(model) {
50        Ok(counter) => {
51            crate::debug_log!("Successfully created token counter for model '{}'", model);
52            Some(counter)
53        }
54        Err(e) => {
55            crate::debug_log!(
56                "Failed to create token counter for model '{}': {}",
57                model,
58                e
59            );
60            eprintln!(
61                "Warning: Failed to create token counter for model '{}': {}",
62                model, e
63            );
64            None
65        }
66    };
67
68    let mut final_prompt = prompt.to_string();
69    let mut final_history = history.to_vec();
70    let mut input_tokens = None;
71
72    // Validate context size if we have both metadata and token counter
73    if let (Some(metadata), Some(ref counter)) = (&model_metadata, &token_counter) {
74        if let Some(context_limit) = metadata.context_length {
75            // Check if input exceeds context limit
76            if counter.exceeds_context_limit(prompt, system_prompt, history, context_limit) {
77                println!(
78                    "⚠️  Input exceeds model context limit ({}k tokens). Truncating...",
79                    context_limit / 1000
80                );
81
82                // Truncate to fit within context limit
83                let (truncated_prompt, truncated_history) = counter.truncate_to_fit(
84                    prompt,
85                    system_prompt,
86                    history,
87                    context_limit,
88                    metadata.max_output_tokens,
89                );
90
91                final_prompt = truncated_prompt;
92                final_history = truncated_history;
93
94                if final_history.len() < history.len() {
95                    println!(
96                        "📝 Truncated conversation history from {} to {} messages",
97                        history.len(),
98                        final_history.len()
99                    );
100                }
101
102                if final_prompt.len() < prompt.len() {
103                    println!(
104                        "✂️  Truncated prompt from {} to {} characters",
105                        prompt.len(),
106                        final_prompt.len()
107                    );
108                }
109            }
110
111            // Calculate input tokens after potential truncation
112            input_tokens = Some(counter.estimate_chat_tokens(
113                &final_prompt,
114                system_prompt,
115                &final_history,
116            ) as i32);
117        }
118    } else if let Some(ref counter) = token_counter {
119        // No metadata available, but we can still count tokens
120        input_tokens =
121            Some(counter.estimate_chat_tokens(&final_prompt, system_prompt, &final_history) as i32);
122    }
123
124    // Build messages for the request
125    let mut messages = Vec::new();
126
127    // Add system prompt if provided
128    if let Some(sys_prompt) = system_prompt {
129        messages.push(Message {
130            role: "system".to_string(),
131            content_type: MessageContent::Text {
132                content: Some(sys_prompt.to_string()),
133            },
134            tool_calls: None,
135            tool_call_id: None,
136        });
137    }
138
139    // Add conversation history
140    for entry in &final_history {
141        messages.push(Message::user(entry.question.clone()));
142        messages.push(Message::assistant(entry.response.clone()));
143    }
144
145    // Add current prompt
146    messages.push(Message::user(final_prompt));
147
148    let request = ChatRequest {
149        model: model.to_string(),
150        messages: messages.clone(),
151        max_tokens: max_tokens.or(Some(1024)),
152        temperature: temperature.or(Some(0.7)),
153        tools,
154        stream: None, // Non-streaming request
155    };
156
157    crate::debug_log!(
158        "Sending chat request with {} messages, max_tokens: {:?}, temperature: {:?}",
159        messages.len(),
160        request.max_tokens,
161        request.temperature
162    );
163
164    // Send the request
165    crate::debug_log!("Making API call to chat endpoint...");
166    let response = client.chat(&request).await?;
167
168    crate::debug_log!(
169        "Received response from chat API ({} characters)",
170        response.len()
171    );
172
173    // Calculate output tokens if we have a token counter
174    let output_tokens = if let Some(ref counter) = token_counter {
175        Some(counter.count_tokens(&response) as i32)
176    } else {
177        None
178    };
179
180    // Display token usage if available
181    if let (Some(input), Some(output)) = (input_tokens, output_tokens) {
182        println!(
183            "📊 Token usage: {} input + {} output = {} total",
184            input,
185            output,
186            input + output
187        );
188
189        // Show cost estimate if we have pricing info
190        if let Some(metadata) = &model_metadata {
191            if let (Some(input_price), Some(output_price)) =
192                (metadata.input_price_per_m, metadata.output_price_per_m)
193            {
194                let input_cost = (input as f64 / 1_000_000.0) * input_price;
195                let output_cost = (output as f64 / 1_000_000.0) * output_price;
196                let total_cost = input_cost + output_cost;
197                println!(
198                    "💰 Estimated cost: ${:.6} (${:.6} input + ${:.6} output)",
199                    total_cost, input_cost, output_cost
200                );
201            }
202        }
203    }
204
205    Ok((response, input_tokens, output_tokens))
206}
207
208pub async fn send_chat_request_with_streaming(
209    client: &LLMClient,
210    model: &str,
211    prompt: &str,
212    history: &[ChatEntry],
213    system_prompt: Option<&str>,
214    max_tokens: Option<u32>,
215    temperature: Option<f32>,
216    provider_name: &str,
217    tools: Option<Vec<crate::provider::Tool>>,
218) -> Result<()> {
219    crate::debug_log!("Sending streaming chat request - provider: '{}', model: '{}', prompt length: {}, history entries: {}",
220                      provider_name, model, prompt.len(), history.len());
221    crate::debug_log!(
222        "Request parameters - max_tokens: {:?}, temperature: {:?}",
223        max_tokens,
224        temperature
225    );
226
227    // Try to get model metadata for context validation
228    crate::debug_log!(
229        "Loading model metadata for provider '{}', model '{}'",
230        provider_name,
231        model
232    );
233    let model_metadata = get_model_metadata(provider_name, model).await;
234
235    if let Some(ref metadata) = model_metadata {
236        crate::debug_log!(
237            "Found metadata for model '{}' - context_length: {:?}, max_output: {:?}",
238            model,
239            metadata.context_length,
240            metadata.max_output_tokens
241        );
242    } else {
243        crate::debug_log!("No metadata found for model '{}'", model);
244    }
245
246    // Create token counter
247    crate::debug_log!("Creating token counter for model '{}'", model);
248    let token_counter = match TokenCounter::new(model) {
249        Ok(counter) => {
250            crate::debug_log!("Successfully created token counter for model '{}'", model);
251            Some(counter)
252        }
253        Err(e) => {
254            crate::debug_log!(
255                "Failed to create token counter for model '{}': {}",
256                model,
257                e
258            );
259            eprintln!(
260                "Warning: Failed to create token counter for model '{}': {}",
261                model, e
262            );
263            None
264        }
265    };
266
267    let mut final_prompt = prompt.to_string();
268    let mut final_history = history.to_vec();
269
270    // Validate context size if we have both metadata and token counter
271    if let (Some(metadata), Some(ref counter)) = (&model_metadata, &token_counter) {
272        if let Some(context_limit) = metadata.context_length {
273            // Check if input exceeds context limit
274            if counter.exceeds_context_limit(prompt, system_prompt, history, context_limit) {
275                println!(
276                    "⚠️  Input exceeds model context limit ({}k tokens). Truncating...",
277                    context_limit / 1000
278                );
279
280                // Truncate to fit within context limit
281                let (truncated_prompt, truncated_history) = counter.truncate_to_fit(
282                    prompt,
283                    system_prompt,
284                    history,
285                    context_limit,
286                    metadata.max_output_tokens,
287                );
288
289                final_prompt = truncated_prompt;
290                final_history = truncated_history;
291
292                if final_history.len() < history.len() {
293                    println!(
294                        "📝 Truncated conversation history from {} to {} messages",
295                        history.len(),
296                        final_history.len()
297                    );
298                }
299
300                if final_prompt.len() < prompt.len() {
301                    println!(
302                        "✂️  Truncated prompt from {} to {} characters",
303                        prompt.len(),
304                        final_prompt.len()
305                    );
306                }
307            }
308        }
309    }
310
311    // Build messages for the request
312    let mut messages = Vec::new();
313
314    // Add system prompt if provided
315    if let Some(sys_prompt) = system_prompt {
316        messages.push(Message {
317            role: "system".to_string(),
318            content_type: MessageContent::Text {
319                content: Some(sys_prompt.to_string()),
320            },
321            tool_calls: None,
322            tool_call_id: None,
323        });
324    }
325
326    // Add conversation history
327    for entry in &final_history {
328        messages.push(Message::user(entry.question.clone()));
329        messages.push(Message::assistant(entry.response.clone()));
330    }
331
332    // Add current prompt
333    messages.push(Message::user(final_prompt));
334
335    let request = ChatRequest {
336        model: model.to_string(),
337        messages: messages.clone(),
338        max_tokens: max_tokens.or(Some(1024)),
339        temperature: temperature.or(Some(0.7)),
340        tools,
341        stream: Some(true), // Enable streaming
342    };
343
344    crate::debug_log!(
345        "Sending streaming chat request with {} messages, max_tokens: {:?}, temperature: {:?}",
346        messages.len(),
347        request.max_tokens,
348        request.temperature
349    );
350
351    // Send the streaming request
352    crate::debug_log!("Making streaming API call to chat endpoint...");
353    client.chat_stream(&request).await?;
354
355    Ok(())
356}
357
358async fn get_model_metadata(
359    provider_name: &str,
360    model_name: &str,
361) -> Option<crate::model_metadata::ModelMetadata> {
362    let filename = format!("models/{}.json", provider_name);
363
364    if !std::path::Path::new(&filename).exists() {
365        return None;
366    }
367
368    match tokio::fs::read_to_string(&filename).await {
369        Ok(json_content) => {
370            match MetadataExtractor::extract_from_provider(provider_name, &json_content) {
371                Ok(models) => models.into_iter().find(|m| m.id == model_name),
372                Err(_) => None,
373            }
374        }
375        Err(_) => None,
376    }
377}
378
379pub async fn get_or_refresh_token(
380    config: &mut Config,
381    provider_name: &str,
382    client: &OpenAIClient,
383) -> Result<String> {
384    // If provider is configured for Google SA JWT (Vertex AI), use JWT Bearer flow
385    let provider = config.get_provider_with_auth(provider_name)?.clone();
386    let is_vertex = provider
387        .endpoint
388        .to_lowercase()
389        .contains("aiplatform.googleapis.com")
390        || provider.auth_type.as_deref() == Some("google_sa_jwt");
391
392    // If we have a valid cached token, use it (30s skew)
393    if let Some(cached_token) = config.get_cached_token(provider_name) {
394        if Utc::now() < cached_token.expires_at {
395            return Ok(cached_token.token.clone());
396        }
397    }
398
399    if is_vertex {
400        // Google OAuth 2.0 JWT Bearer flow
401        let token_url = provider
402            .token_url
403            .clone()
404            .unwrap_or_else(|| "https://oauth2.googleapis.com/token".to_string());
405
406        // Parse Service Account JSON from api_key
407        let api_key_raw = provider.api_key.clone().ok_or_else(|| {
408            anyhow::anyhow!(
409                "Service Account JSON not set for '{}'. Run lc k a {} and paste SA JSON.",
410                provider_name,
411                provider_name
412            )
413        })?;
414        #[derive(serde::Deserialize)]
415        struct GoogleSA {
416            #[serde(rename = "type")]
417            sa_type: String,
418            client_email: String,
419            private_key: String,
420        }
421        let sa: GoogleSA = serde_json::from_str(&api_key_raw)
422            .map_err(|e| anyhow::anyhow!("Invalid Service Account JSON: {}", e))?;
423        if sa.sa_type != "service_account" {
424            anyhow::bail!("Provided key is not a service_account");
425        }
426
427        // Build JWT
428        #[derive(serde::Serialize)]
429        struct Claims<'a> {
430            iss: &'a str,
431            scope: &'a str,
432            aud: &'a str,
433            exp: i64,
434            iat: i64,
435        }
436        let now = Utc::now().timestamp();
437        let claims = Claims {
438            iss: &sa.client_email,
439            scope: "https://www.googleapis.com/auth/cloud-platform",
440            aud: &token_url,
441            iat: now,
442            exp: now + 3600,
443        };
444        let header = jsonwebtoken::Header::new(jsonwebtoken::Algorithm::RS256);
445        let key = jsonwebtoken::EncodingKey::from_rsa_pem(sa.private_key.as_bytes())
446            .map_err(|e| anyhow::anyhow!("Failed to load RSA key: {}", e))?;
447        let assertion = jsonwebtoken::encode(&header, &claims, &key)
448            .map_err(|e| anyhow::anyhow!("JWT encode failed: {}", e))?;
449
450        // Exchange for access token
451        #[derive(serde::Deserialize)]
452        struct GoogleTokenResp {
453            access_token: String,
454            expires_in: i64,
455            #[allow(dead_code)]
456            token_type: String,
457        }
458        let http = reqwest::Client::new();
459        let resp = http
460            .post(&token_url)
461            .form(&[
462                ("grant_type", "urn:ietf:params:oauth:grant-type:jwt-bearer"),
463                ("assertion", assertion.as_str()),
464            ])
465            .send()
466            .await
467            .map_err(|e| anyhow::anyhow!("Token exchange error: {}", e))?;
468        if !resp.status().is_success() {
469            let status = resp.status();
470            let txt = resp.text().await.unwrap_or_default();
471            anyhow::bail!("Token exchange failed ({}): {}", status, txt);
472        }
473        let token_json: GoogleTokenResp = resp
474            .json()
475            .await
476            .map_err(|e| anyhow::anyhow!("Failed to parse token response: {}", e))?;
477        let expires_at = DateTime::from_timestamp(now + token_json.expires_in - 60, 0)
478            .ok_or_else(|| anyhow::anyhow!("Invalid expires timestamp"))?;
479        config.set_cached_token(
480            provider_name.to_string(),
481            token_json.access_token.clone(),
482            expires_at,
483        )?;
484        config.save()?;
485        return Ok(token_json.access_token);
486    }
487
488    // Fallback: GitHub-style token endpoint using existing client helper
489    let token_url = match config.get_token_url(provider_name) {
490        Some(url) => url.clone(),
491        None => {
492            let provider_config = config.get_provider_with_auth(provider_name)?;
493            return provider_config.api_key.clone().ok_or_else(|| {
494                anyhow::anyhow!(
495                    "No API key or token URL configured for provider '{}'",
496                    provider_name
497                )
498            });
499        }
500    };
501
502    let token_response = client.get_token_from_url(&token_url).await?;
503    let expires_at = DateTime::from_timestamp(token_response.expires_at, 0).ok_or_else(|| {
504        anyhow::anyhow!(
505            "Invalid expires_at timestamp: {}",
506            token_response.expires_at
507        )
508    })?;
509    config.set_cached_token(
510        provider_name.to_string(),
511        token_response.token.clone(),
512        expires_at,
513    )?;
514    config.save()?;
515    Ok(token_response.token)
516}
517
518// All providers now use OpenAIClient with template-based transformations
519pub type LLMClient = OpenAIClient;
520
521// Hardcoded conversion functions removed - now using template-based transformations
522
523pub async fn create_authenticated_client(
524    config: &mut Config,
525    provider_name: &str,
526) -> Result<LLMClient> {
527    crate::debug_log!(
528        "Creating authenticated client for provider '{}'",
529        provider_name
530    );
531
532    // Get provider config with authentication from centralized keys
533    let mut provider_config = config.get_provider_with_auth(provider_name)?;
534
535    crate::debug_log!(
536        "Provider '{}' config - endpoint: {}, models_path: {}, chat_path: {}",
537        provider_name,
538        provider_config.endpoint,
539        provider_config.models_path,
540        provider_config.chat_path
541    );
542
543    // Normalize chat_path placeholders: support both {model} and legacy {model_name}
544    let normalized_chat_path = provider_config.chat_path.replace("{model_name}", "{model}");
545    provider_config.chat_path = normalized_chat_path;
546
547    // All providers now use OpenAIClient with template-based transformations
548    // Check if this needs OAuth authentication (Vertex AI)
549    let needs_oauth = provider_config
550        .endpoint
551        .contains("aiplatform.googleapis.com")
552        || provider_config.auth_type.as_deref() == Some("google_sa_jwt");
553
554    if needs_oauth {
555        // OAuth authentication flow (Vertex AI)
556        let temp_client = OpenAIClient::new_with_headers(
557            provider_config.endpoint.clone(),
558            provider_config.api_key.clone().unwrap_or_default(),
559            provider_config.models_path.clone(),
560            provider_config.chat_path.clone(),
561            provider_config.headers.clone(),
562        );
563
564        let auth_token = get_or_refresh_token(config, provider_name, &temp_client).await?;
565
566        // Create custom headers with Authorization
567        let mut oauth_headers = provider_config.headers.clone();
568        oauth_headers.insert(
569            "Authorization".to_string(),
570            format!("Bearer {}", auth_token),
571        );
572
573        let client = OpenAIClient::new_with_provider_config(
574            provider_config.endpoint.clone(),
575            auth_token,
576            provider_config.models_path.clone(),
577            provider_config.chat_path.clone(),
578            oauth_headers,
579            provider_config.clone(),
580        );
581
582        return Ok(client);
583    }
584
585    // Regular authentication flow (API key or token URL)
586    // Special-case: if headers already contain resolved auth (e.g., x-goog-api-key), we don't need a token
587    let header_has_resolved_key = provider_config.headers.iter().any(|(k, v)| {
588        let k_l = k.to_lowercase();
589        // Heuristic: header name suggests auth AND value is not a placeholder and not empty
590        (k_l.contains("key") || k_l.contains("token") || k_l.contains("auth"))
591            && !v.trim().is_empty()
592            && !v.contains("${api_key}")
593    });
594
595    if provider_config.api_key.is_none() && header_has_resolved_key {
596        // Header-based auth present (e.g., Gemini x-goog-api-key). No token retrieval needed.
597        // Pass empty api_key since Authorization won't be used when custom headers exist.
598        let client = OpenAIClient::new_with_provider_config(
599            provider_config.endpoint.clone(),
600            String::new(),
601            provider_config.models_path.clone(),
602            provider_config.chat_path.clone(),
603            provider_config.headers.clone(),
604            provider_config.clone(),
605        );
606        return Ok(client);
607    }
608
609    // Fallback: API key in Authorization or token URL-based auth
610    let temp_client = OpenAIClient::new_with_headers(
611        provider_config.endpoint.clone(),
612        provider_config.api_key.clone().unwrap_or_default(),
613        provider_config.models_path.clone(),
614        provider_config.chat_path.clone(),
615        provider_config.headers.clone(),
616    );
617
618    let auth_token = get_or_refresh_token(config, provider_name, &temp_client).await?;
619
620    let client = OpenAIClient::new_with_provider_config(
621        provider_config.endpoint.clone(),
622        auth_token,
623        provider_config.models_path.clone(),
624        provider_config.chat_path.clone(),
625        provider_config.headers.clone(),
626        provider_config.clone(),
627    );
628
629    Ok(client)
630}
631
632// New function to handle tool execution loop
633pub async fn send_chat_request_with_tool_execution(
634    client: &LLMClient,
635    model: &str,
636    prompt: &str,
637    history: &[ChatEntry],
638    system_prompt: Option<&str>,
639    max_tokens: Option<u32>,
640    temperature: Option<f32>,
641    _provider_name: &str,
642    tools: Option<Vec<crate::provider::Tool>>,
643    mcp_server_names: &[&str],
644) -> Result<(String, Option<i32>, Option<i32>)> {
645    use crate::provider::{ChatRequest, Message};
646    use crate::token_utils::TokenCounter;
647
648    let mut conversation_messages = Vec::new();
649    let mut total_input_tokens = 0i32;
650    let mut total_output_tokens = 0i32;
651
652    // Create token counter for tracking usage
653    let token_counter = TokenCounter::new(model).ok();
654
655    // Add system prompt if provided
656    if let Some(sys_prompt) = system_prompt {
657        conversation_messages.push(Message {
658            role: "system".to_string(),
659            content_type: MessageContent::Text {
660                content: Some(sys_prompt.to_string()),
661            },
662            tool_calls: None,
663            tool_call_id: None,
664        });
665    }
666
667    // Add conversation history
668    for entry in history {
669        conversation_messages.push(Message::user(entry.question.clone()));
670        conversation_messages.push(Message::assistant(entry.response.clone()));
671    }
672
673    // Add current prompt
674    conversation_messages.push(Message::user(prompt.to_string()));
675    let max_iterations = 10; // Prevent infinite loops
676    let mut iteration = 0;
677
678    loop {
679        iteration += 1;
680        if iteration > max_iterations {
681            anyhow::bail!(
682                "Maximum tool execution iterations reached ({})",
683                max_iterations
684            );
685        }
686
687        crate::debug_log!("Tool execution iteration {}/{}", iteration, max_iterations);
688
689        let request = ChatRequest {
690            model: model.to_string(),
691            messages: conversation_messages.clone(),
692            max_tokens: max_tokens.or(Some(1024)),
693            temperature: temperature.or(Some(0.7)),
694            tools: tools.clone(),
695            stream: None, // Non-streaming request for tool execution
696        };
697
698        // Make the API call
699        let response = client.chat_with_tools(&request).await?;
700
701        // Track token usage if we have a counter
702        if let Some(ref counter) = token_counter {
703            let input_tokens = counter.estimate_chat_tokens("", system_prompt, &[]) as i32;
704            total_input_tokens += input_tokens;
705        }
706
707        if let Some(choice) = response.choices.first() {
708            crate::debug_log!(
709                "Response choice - tool_calls: {}, content: {}",
710                choice.message.tool_calls.as_ref().map_or(0, |tc| tc.len()),
711                choice
712                    .message
713                    .content
714                    .as_ref()
715                    .map_or("None", |c| if c.len() > 50 { &c[..50] } else { c })
716            );
717
718            // Check if the LLM made tool calls
719            if let Some(tool_calls) = &choice.message.tool_calls {
720                if !tool_calls.is_empty() {
721                    crate::debug_log!(
722                        "LLM made {} tool calls in iteration {}",
723                        tool_calls.len(),
724                        iteration
725                    );
726
727                    // Add the assistant's tool call message to conversation
728                    conversation_messages
729                        .push(Message::assistant_with_tool_calls(tool_calls.clone()));
730
731                    // Execute each tool call
732                    for (i, tool_call) in tool_calls.iter().enumerate() {
733                        crate::debug_log!(
734                            "Executing tool call {}/{}: {} with args: {}",
735                            i + 1,
736                            tool_calls.len(),
737                            tool_call.function.name,
738                            tool_call.function.arguments
739                        );
740
741                        // Find which MCP server has this function using daemon client
742                        let daemon_client = crate::mcp_daemon::DaemonClient::new()?;
743                        let mut tool_result = None;
744                        for server_name in mcp_server_names {
745                            // Parse arguments as JSON value instead of Vec<String>
746                            let args_value: serde_json::Value =
747                                serde_json::from_str(&tool_call.function.arguments)?;
748                            match daemon_client
749                                .call_tool(server_name, &tool_call.function.name, args_value)
750                                .await
751                            {
752                                Ok(result) => {
753                                    crate::debug_log!(
754                                        "Tool call successful on server '{}': {}",
755                                        server_name,
756                                        serde_json::to_string(&result)
757                                            .unwrap_or_else(|_| "invalid json".to_string())
758                                    );
759                                    tool_result = Some(format_tool_result(&result));
760                                    break;
761                                }
762                                Err(e) => {
763                                    crate::debug_log!(
764                                        "Tool call failed on server '{}': {}",
765                                        server_name,
766                                        e
767                                    );
768                                    continue;
769                                }
770                            }
771                        }
772
773                        let result_content = tool_result.unwrap_or_else(|| {
774                            format!(
775                                "Error: Function '{}' not found on any MCP server",
776                                tool_call.function.name
777                            )
778                        });
779
780                        crate::debug_log!(
781                            "Tool result for {}: {}",
782                            tool_call.function.name,
783                            if result_content.len() > 100 {
784                                format!("{}...", &result_content[..100])
785                            } else {
786                                result_content.clone()
787                            }
788                        );
789
790                        // Add tool result to conversation
791                        conversation_messages
792                            .push(Message::tool_result(tool_call.id.clone(), result_content));
793                    }
794
795                    // Continue the loop to get the LLM's response to the tool results
796                    continue;
797                } else {
798                    // Empty tool_calls array - check if we have content (final answer)
799                    if let Some(content) = &choice.message.content {
800                        if !content.trim().is_empty() {
801                            crate::debug_log!("LLM provided final answer with empty tool_calls after {} iterations: {}",
802                                             iteration, if content.len() > 100 {
803                                                 format!("{}...", &content[..100])
804                                             } else {
805                                                 content.clone()
806                                             });
807
808                            // Track output tokens
809                            if let Some(ref counter) = token_counter {
810                                total_output_tokens += counter.count_tokens(content) as i32;
811                            }
812
813                            // Exit immediately when LLM provides content (final answer)
814                            return Ok((
815                                content.clone(),
816                                Some(total_input_tokens),
817                                Some(total_output_tokens),
818                            ));
819                        }
820                    }
821                }
822            } else if let Some(content) = &choice.message.content {
823                // LLM provided a final answer without tool calls field
824                crate::debug_log!(
825                    "LLM provided final answer without tool_calls field after {} iterations: {}",
826                    iteration,
827                    if content.len() > 100 {
828                        format!("{}...", &content[..100])
829                    } else {
830                        content.clone()
831                    }
832                );
833
834                // Track output tokens
835                if let Some(ref counter) = token_counter {
836                    total_output_tokens += counter.count_tokens(content) as i32;
837                }
838
839                // Exit immediately when LLM provides content (final answer)
840                return Ok((
841                    content.clone(),
842                    Some(total_input_tokens),
843                    Some(total_output_tokens),
844                ));
845            } else {
846                // LLM provided neither tool calls nor content - this shouldn't happen
847                crate::debug_log!(
848                    "LLM provided neither tool calls nor content in iteration {}",
849                    iteration
850                );
851                anyhow::bail!(
852                    "No content or tool calls in response from LLM in iteration {}",
853                    iteration
854                );
855            }
856        } else {
857            anyhow::bail!("No response from API");
858        }
859    }
860}
861
862// Helper function to format tool result for display
863fn format_tool_result(result: &serde_json::Value) -> String {
864    const MAX_TOOL_RESULT_LENGTH: usize = 10000; // Limit tool results to 10KB
865    const TRUNCATION_MESSAGE: &str = "\n\n[Content truncated - exceeded maximum length]";
866    
867    if let Some(content_array) = result.get("content") {
868        if let Some(content_items) = content_array.as_array() {
869            let mut formatted = String::new();
870            for item in content_items {
871                if let Some(text) = item.get("text") {
872                    if let Some(text_str) = text.as_str() {
873                        // Check if adding this text would exceed the limit
874                        if formatted.len() + text_str.len() > MAX_TOOL_RESULT_LENGTH {
875                            // Add as much as we can
876                            let remaining = MAX_TOOL_RESULT_LENGTH.saturating_sub(formatted.len());
877                            if remaining > 0 {
878                                formatted.push_str(&text_str[..remaining.min(text_str.len())]);
879                            }
880                            formatted.push_str(TRUNCATION_MESSAGE);
881                            break; // Stop processing more items
882                        } else {
883                            formatted.push_str(text_str);
884                            formatted.push('\n');
885                        }
886                    }
887                }
888            }
889            return formatted.trim().to_string();
890        }
891    }
892
893    // Fallback to pretty-printed JSON (also with truncation)
894    let json_result = serde_json::to_string_pretty(result)
895        .unwrap_or_else(|_| "Error formatting result".to_string());
896    
897    if json_result.len() > MAX_TOOL_RESULT_LENGTH {
898        format!("{}{}", &json_result[..MAX_TOOL_RESULT_LENGTH], TRUNCATION_MESSAGE)
899    } else {
900        json_result
901    }
902}
903
904// Message-based versions of the chat functions for handling multimodal content
905
906pub async fn send_chat_request_with_validation_messages(
907    client: &LLMClient,
908    model: &str,
909    messages: &[Message],
910    system_prompt: Option<&str>,
911    max_tokens: Option<u32>,
912    temperature: Option<f32>,
913    provider_name: &str,
914    tools: Option<Vec<crate::provider::Tool>>,
915) -> Result<(String, Option<i32>, Option<i32>)> {
916    crate::debug_log!(
917        "Sending chat request with messages - provider: '{}', model: '{}', messages: {}",
918        provider_name,
919        model,
920        messages.len()
921    );
922
923    // Build final messages including system prompt if needed
924    let mut final_messages = Vec::new();
925
926    // Add system prompt if provided and not already in messages
927    if let Some(sys_prompt) = system_prompt {
928        let has_system = messages.iter().any(|m| m.role == "system");
929        if !has_system {
930            final_messages.push(Message {
931                role: "system".to_string(),
932                content_type: MessageContent::Text {
933                    content: Some(sys_prompt.to_string()),
934                },
935                tool_calls: None,
936                tool_call_id: None,
937            });
938        }
939    }
940
941    // Add all provided messages
942    final_messages.extend_from_slice(messages);
943
944    let request = ChatRequest {
945        model: model.to_string(),
946        messages: final_messages,
947        max_tokens: max_tokens.or(Some(1024)),
948        temperature: temperature.or(Some(0.7)),
949        tools,
950        stream: None,
951    };
952
953    let response = client.chat(&request).await?;
954
955    // For now, return None for token counts as we'd need to implement multimodal token counting
956    Ok((response, None, None))
957}
958
959pub async fn send_chat_request_with_streaming_messages(
960    client: &LLMClient,
961    model: &str,
962    messages: &[Message],
963    system_prompt: Option<&str>,
964    max_tokens: Option<u32>,
965    temperature: Option<f32>,
966    provider_name: &str,
967    tools: Option<Vec<crate::provider::Tool>>,
968) -> Result<()> {
969    crate::debug_log!(
970        "Sending streaming chat request with messages - provider: '{}', model: '{}', messages: {}",
971        provider_name,
972        model,
973        messages.len()
974    );
975
976    // Build final messages including system prompt if needed
977    let mut final_messages = Vec::new();
978
979    // Add system prompt if provided and not already in messages
980    if let Some(sys_prompt) = system_prompt {
981        let has_system = messages.iter().any(|m| m.role == "system");
982        if !has_system {
983            final_messages.push(Message {
984                role: "system".to_string(),
985                content_type: MessageContent::Text {
986                    content: Some(sys_prompt.to_string()),
987                },
988                tool_calls: None,
989                tool_call_id: None,
990            });
991        }
992    }
993
994    // Add all provided messages
995    final_messages.extend_from_slice(messages);
996
997    let request = ChatRequest {
998        model: model.to_string(),
999        messages: final_messages,
1000        max_tokens: max_tokens.or(Some(1024)),
1001        temperature: temperature.or(Some(0.7)),
1002        tools,
1003        stream: Some(true),
1004    };
1005
1006    client.chat_stream(&request).await?;
1007
1008    Ok(())
1009}
1010
1011pub async fn send_chat_request_with_tool_execution_messages(
1012    client: &LLMClient,
1013    model: &str,
1014    messages: &[Message],
1015    system_prompt: Option<&str>,
1016    max_tokens: Option<u32>,
1017    temperature: Option<f32>,
1018    provider_name: &str,
1019    tools: Option<Vec<crate::provider::Tool>>,
1020    mcp_server_names: &[&str],
1021) -> Result<(String, Option<i32>, Option<i32>)> {
1022    crate::debug_log!("Sending chat request with tool execution and messages - provider: '{}', model: '{}', messages: {}",
1023                      provider_name, model, messages.len());
1024
1025    let mut conversation_messages = Vec::new();
1026
1027    // Add system prompt if provided and not already in messages
1028    if let Some(sys_prompt) = system_prompt {
1029        let has_system = messages.iter().any(|m| m.role == "system");
1030        if !has_system {
1031            conversation_messages.push(Message {
1032                role: "system".to_string(),
1033                content_type: MessageContent::Text {
1034                    content: Some(sys_prompt.to_string()),
1035                },
1036                tool_calls: None,
1037                tool_call_id: None,
1038            });
1039        }
1040    }
1041
1042    // Add all provided messages
1043    conversation_messages.extend_from_slice(messages);
1044
1045    let max_iterations = 10;
1046    let mut iteration = 0;
1047
1048    loop {
1049        iteration += 1;
1050        if iteration > max_iterations {
1051            anyhow::bail!(
1052                "Maximum tool execution iterations reached ({})",
1053                max_iterations
1054            );
1055        }
1056
1057        let request = ChatRequest {
1058            model: model.to_string(),
1059            messages: conversation_messages.clone(),
1060            max_tokens: max_tokens.or(Some(1024)),
1061            temperature: temperature.or(Some(0.7)),
1062            tools: tools.clone(),
1063            stream: None,
1064        };
1065
1066        let response = client.chat_with_tools(&request).await?;
1067
1068        if let Some(choice) = response.choices.first() {
1069            if let Some(tool_calls) = &choice.message.tool_calls {
1070                if !tool_calls.is_empty() {
1071                    conversation_messages
1072                        .push(Message::assistant_with_tool_calls(tool_calls.clone()));
1073
1074                    for tool_call in tool_calls {
1075                        let daemon_client = crate::mcp_daemon::DaemonClient::new()?;
1076                        let mut tool_result = None;
1077
1078                        for server_name in mcp_server_names {
1079                            let args_value: serde_json::Value =
1080                                serde_json::from_str(&tool_call.function.arguments)?;
1081                            match daemon_client
1082                                .call_tool(server_name, &tool_call.function.name, args_value)
1083                                .await
1084                            {
1085                                Ok(result) => {
1086                                    tool_result = Some(format_tool_result(&result));
1087                                    break;
1088                                }
1089                                Err(_) => continue,
1090                            }
1091                        }
1092
1093                        let result_content = tool_result.unwrap_or_else(|| {
1094                            format!(
1095                                "Error: Function '{}' not found on any MCP server",
1096                                tool_call.function.name
1097                            )
1098                        });
1099
1100                        conversation_messages
1101                            .push(Message::tool_result(tool_call.id.clone(), result_content));
1102                    }
1103
1104                    continue;
1105                }
1106            }
1107
1108            if let Some(content) = &choice.message.content {
1109                return Ok((content.clone(), None, None));
1110            }
1111        }
1112
1113        anyhow::bail!("No response from API");
1114    }
1115}