1use anyhow::{anyhow, Result};
7use serde::{Deserialize, Serialize};
8
9use crate::rag::RagSearchResult;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct LlmConfig {
14 pub openai_api_key: Option<String>,
15 pub anthropic_api_key: Option<String>,
16 pub groq_api_key: Option<String>,
17 pub openrouter_api_key: Option<String>,
18 pub huggingface_api_key: Option<String>,
19 pub custom_endpoint: Option<String>,
20 pub preferred_provider: LlmProvider,
21 pub fallback_providers: Vec<LlmProvider>,
22 pub timeout_seconds: u64,
23 pub max_tokens: u32,
24 pub temperature: f32,
25 pub model_name: Option<String>,
26 pub streaming: bool,
27}
28
29impl Default for LlmConfig {
30 fn default() -> Self {
31 Self {
32 openai_api_key: None,
33 anthropic_api_key: None,
34 groq_api_key: None,
35 openrouter_api_key: None,
36 huggingface_api_key: None,
37 custom_endpoint: None,
38 preferred_provider: LlmProvider::Auto,
39 fallback_providers: vec![
40 LlmProvider::OpenAI,
41 LlmProvider::Anthropic,
42 LlmProvider::Groq,
43 LlmProvider::OpenRouter,
44 ],
45 timeout_seconds: 30,
46 max_tokens: 1000,
47 temperature: 0.1,
48 model_name: None,
49 streaming: false,
50 }
51 }
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
56pub enum LlmProvider {
57 Auto,
58 OpenAI,
59 Anthropic,
60 Groq,
61 OpenRouter,
62 HuggingFace,
63 Custom,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct LlmResponse {
69 pub answer: String,
70 pub sources_used: Vec<String>,
71 pub confidence: Option<f32>,
72 pub provider_used: LlmProvider,
73 pub model_used: String,
74 pub tokens_used: Option<u32>,
75 pub response_time_ms: u64,
76 pub finish_reason: Option<String>,
77 pub citations: Vec<Citation>,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct Citation {
83 pub source_id: String,
84 pub source_title: String,
85 pub source_url: Option<String>,
86 pub relevance_score: f32,
87 pub excerpt: String,
88}
89
90pub struct LlmClient {
92 pub(crate) config: LlmConfig,
93 pub(crate) http_client: reqwest::Client,
94}
95
96impl LlmClient {
97 pub fn new(config: LlmConfig) -> Result<Self> {
99 let http_client = reqwest::Client::builder()
100 .timeout(std::time::Duration::from_secs(config.timeout_seconds))
101 .build()?;
102
103 Ok(Self {
104 config,
105 http_client,
106 })
107 }
108
109 pub fn is_available(&self) -> bool {
111 self.has_openai_key()
112 || self.has_anthropic_key()
113 || self.has_groq_key()
114 || self.has_openrouter_key()
115 || self.has_huggingface_key()
116 || self.config.custom_endpoint.is_some()
117 }
118
119 pub fn has_openai_key(&self) -> bool {
121 self.config
122 .openai_api_key
123 .as_ref()
124 .is_some_and(|key| !key.is_empty())
125 }
126
127 pub fn has_anthropic_key(&self) -> bool {
128 self.config
129 .anthropic_api_key
130 .as_ref()
131 .is_some_and(|key| !key.is_empty())
132 }
133
134 pub fn has_groq_key(&self) -> bool {
135 self.config
136 .groq_api_key
137 .as_ref()
138 .is_some_and(|key| !key.is_empty())
139 }
140
141 pub fn has_openrouter_key(&self) -> bool {
142 self.config
143 .openrouter_api_key
144 .as_ref()
145 .is_some_and(|key| !key.is_empty())
146 }
147
148 pub fn has_huggingface_key(&self) -> bool {
149 self.config
150 .huggingface_api_key
151 .as_ref()
152 .is_some_and(|key| !key.is_empty())
153 }
154
155 pub fn get_best_provider(&self) -> Option<LlmProvider> {
157 if self.config.preferred_provider != LlmProvider::Auto {
158 if self.is_provider_available(&self.config.preferred_provider) {
160 return Some(self.config.preferred_provider.clone());
161 }
162 }
163
164 for provider in &self.config.fallback_providers {
166 if self.is_provider_available(provider) {
167 return Some(provider.clone());
168 }
169 }
170
171 None
172 }
173
174 pub fn is_provider_available(&self, provider: &LlmProvider) -> bool {
176 match provider {
177 LlmProvider::OpenAI => self.has_openai_key(),
178 LlmProvider::Anthropic => self.has_anthropic_key(),
179 LlmProvider::Groq => self.has_groq_key(),
180 LlmProvider::OpenRouter => self.has_openrouter_key(),
181 LlmProvider::HuggingFace => self.has_huggingface_key(),
182 LlmProvider::Custom => self.config.custom_endpoint.is_some(),
183 LlmProvider::Auto => false, }
185 }
186
187 pub async fn synthesize_answer(
189 &self,
190 query: &str,
191 results: &[RagSearchResult],
192 ) -> Result<LlmResponse> {
193 let provider = self
194 .get_best_provider()
195 .ok_or_else(|| anyhow!("No LLM provider available"))?;
196
197 let start_time = std::time::Instant::now();
198
199 let response = match provider {
200 LlmProvider::OpenAI => self.synthesize_with_openai(query, results).await,
201 LlmProvider::Anthropic => self.synthesize_with_anthropic(query, results).await,
202 LlmProvider::Groq => self.synthesize_with_groq(query, results).await,
203 LlmProvider::OpenRouter => self.synthesize_with_openrouter(query, results).await,
204 LlmProvider::HuggingFace => self.synthesize_with_huggingface(query, results).await,
205 LlmProvider::Custom => self.synthesize_with_custom(query, results).await,
206 LlmProvider::Auto => unreachable!(),
207 };
208
209 match response {
211 Ok(mut resp) => {
212 resp.response_time_ms = start_time.elapsed().as_millis() as u64;
213 Ok(resp)
214 }
215 Err(e) => {
216 log::warn!("Primary provider {:?} failed: {}", provider, e);
217 self.try_fallback_providers(query, results, &provider).await
218 }
219 }
220 }
221
222 async fn try_fallback_providers(
224 &self,
225 query: &str,
226 results: &[RagSearchResult],
227 failed_provider: &LlmProvider,
228 ) -> Result<LlmResponse> {
229 for provider in &self.config.fallback_providers {
230 if provider != failed_provider && self.is_provider_available(provider) {
231 log::info!("Trying fallback provider: {:?}", provider);
232
233 let start_time = std::time::Instant::now();
234 let response = match provider {
235 LlmProvider::OpenAI => self.synthesize_with_openai(query, results).await,
236 LlmProvider::Anthropic => self.synthesize_with_anthropic(query, results).await,
237 LlmProvider::Groq => self.synthesize_with_groq(query, results).await,
238 LlmProvider::OpenRouter => {
239 self.synthesize_with_openrouter(query, results).await
240 }
241 LlmProvider::HuggingFace => {
242 self.synthesize_with_huggingface(query, results).await
243 }
244 LlmProvider::Custom => self.synthesize_with_custom(query, results).await,
245 LlmProvider::Auto => continue,
246 };
247
248 if let Ok(mut resp) = response {
249 resp.response_time_ms = start_time.elapsed().as_millis() as u64;
250 return Ok(resp);
251 }
252 }
253 }
254
255 Err(anyhow!("All LLM providers failed"))
256 }
257
258 fn get_model_name(&self, provider: &LlmProvider) -> String {
260 if let Some(model) = &self.config.model_name {
261 return model.clone();
262 }
263
264 match provider {
265 LlmProvider::OpenAI => "gpt-4o-mini".to_string(),
266 LlmProvider::Anthropic => "claude-3-haiku-20240307".to_string(),
267 LlmProvider::Groq => "llama-3.1-8b-instant".to_string(),
268 LlmProvider::OpenRouter => "openai/gpt-3.5-turbo".to_string(),
269 LlmProvider::HuggingFace => "microsoft/DialoGPT-medium".to_string(),
270 LlmProvider::Custom => "custom-model".to_string(),
271 LlmProvider::Auto => "auto".to_string(),
272 }
273 }
274
275 fn create_system_prompt(&self) -> String {
277 r#"You are a concise technical documentation assistant. Provide clear, scannable answers based ONLY on the provided search results.
278
279RESPONSE FORMAT:
2801. **Quick Answer** (1-2 sentences max)
2812. **Key Points** (bullet points, max 4 items)
2823. **Code Example** (if available - keep it short and practical)
283
284RULES:
285- Be extremely concise and scannable
286- Use bullet points and short paragraphs
287- Only include essential information
288- Cite sources as [Source N]
289- Never add information not in the sources
290- Focus on what developers need to know immediately
291
292STYLE:
293- Write for busy developers who want quick answers
294- Use clear, simple language
295- Keep code examples minimal but complete
296- Prioritize readability over completeness"#.to_string()
297 }
298
299 fn create_user_prompt(&self, query: &str, results: &[RagSearchResult]) -> String {
301 let mut prompt = format!("Question: {}\n\nSearch Results:\n\n", query);
302
303 for (i, result) in results.iter().enumerate() {
304 prompt.push_str(&format!(
305 "[Source {}] {}\nURL: {}\nContent: {}\n\n",
306 i + 1,
307 result.title.as_ref().unwrap_or(&"Untitled".to_string()),
308 result.source_path.to_string_lossy(),
309 result.content.chars().take(1000).collect::<String>()
310 ));
311 }
312
313 prompt.push_str("\nPlease provide a comprehensive answer based on these search results.");
314 prompt
315 }
316
317 fn extract_final_answer(&self, response_text: &str) -> String {
319 if response_text.contains("<thinking>") && response_text.contains("</thinking>") {
321 if let Some(thinking_end) = response_text.find("</thinking>") {
323 let after_thinking = &response_text[thinking_end + "</thinking>".len()..];
324 return after_thinking.trim().to_string();
325 }
326 }
327
328 if response_text.contains("<think>") && response_text.contains("</think>") {
330 if let Some(think_end) = response_text.find("</think>") {
332 let after_think = &response_text[think_end + "</think>".len()..];
333 return after_think.trim().to_string();
334 }
335 }
336
337 if response_text.starts_with("Let me think") || response_text.starts_with("I need to think")
340 {
341 let transition_phrases = [
343 "Here's my answer:",
344 "My answer is:",
345 "To answer your question:",
346 "Based on the search results:",
347 "The answer is:",
348 "\n\n**", "\n\nQuick Answer:",
350 "\n\n##", ];
352
353 for phrase in &transition_phrases {
354 if let Some(pos) = response_text.find(phrase) {
355 let answer_start = if phrase.starts_with('\n') {
356 pos + 2 } else {
358 pos + phrase.len()
359 };
360 return response_text[answer_start..].trim().to_string();
361 }
362 }
363 }
364
365 response_text.to_string()
367 }
368
369 fn extract_citations(&self, response_text: &str, results: &[RagSearchResult]) -> Vec<Citation> {
371 let mut citations = Vec::new();
372
373 for (i, result) in results.iter().enumerate() {
375 let source_ref = format!("[Source {}]", i + 1);
376 if response_text.contains(&source_ref) {
377 citations.push(Citation {
378 source_id: result.id.clone(),
379 source_title: result
380 .title
381 .clone()
382 .unwrap_or_else(|| "Untitled".to_string()),
383 source_url: Some(result.source_path.to_string_lossy().to_string()),
384 relevance_score: result.score,
385 excerpt: result.content.chars().take(200).collect(),
386 });
387 }
388 }
389
390 citations
391 }
392
393 async fn synthesize_with_openai(
395 &self,
396 query: &str,
397 results: &[RagSearchResult],
398 ) -> Result<LlmResponse> {
399 let api_key = self
400 .config
401 .openai_api_key
402 .as_ref()
403 .ok_or_else(|| anyhow!("OpenAI API key not configured"))?;
404
405 let model = self.get_model_name(&LlmProvider::OpenAI);
406 let system_prompt = self.create_system_prompt();
407 let user_prompt = self.create_user_prompt(query, results);
408
409 let payload = serde_json::json!({
410 "model": model,
411 "messages": [
412 {
413 "role": "system",
414 "content": system_prompt
415 },
416 {
417 "role": "user",
418 "content": user_prompt
419 }
420 ],
421 "max_tokens": self.config.max_tokens,
422 "temperature": self.config.temperature,
423 "stream": self.config.streaming
424 });
425
426 let response = self
427 .http_client
428 .post("https://api.openai.com/v1/chat/completions")
429 .header("Authorization", format!("Bearer {}", api_key))
430 .header("Content-Type", "application/json")
431 .json(&payload)
432 .send()
433 .await?;
434
435 if !response.status().is_success() {
436 let error_text = response.text().await?;
437 return Err(anyhow!("OpenAI API error: {}", error_text));
438 }
439
440 let response_json: serde_json::Value = response.json().await?;
441
442 let raw_answer = response_json["choices"][0]["message"]["content"]
443 .as_str()
444 .ok_or_else(|| anyhow!("Invalid OpenAI response format"))?;
445 let answer = self.extract_final_answer(raw_answer);
446
447 let usage = &response_json["usage"];
448 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
449 let finish_reason = response_json["choices"][0]["finish_reason"]
450 .as_str()
451 .map(|s| s.to_string());
452
453 let citations = self.extract_citations(&answer, results);
454
455 Ok(LlmResponse {
456 answer,
457 sources_used: results.iter().map(|r| r.id.clone()).collect(),
458 confidence: Some(0.9), provider_used: LlmProvider::OpenAI,
460 model_used: model,
461 tokens_used,
462 response_time_ms: 0, finish_reason,
464 citations,
465 })
466 }
467
468 async fn synthesize_with_anthropic(
470 &self,
471 query: &str,
472 results: &[RagSearchResult],
473 ) -> Result<LlmResponse> {
474 let api_key = self
475 .config
476 .anthropic_api_key
477 .as_ref()
478 .ok_or_else(|| anyhow!("Anthropic API key not configured"))?;
479
480 let model = self.get_model_name(&LlmProvider::Anthropic);
481 let system_prompt = self.create_system_prompt();
482 let user_prompt = self.create_user_prompt(query, results);
483
484 let payload = serde_json::json!({
485 "model": model,
486 "max_tokens": self.config.max_tokens,
487 "temperature": self.config.temperature,
488 "system": system_prompt,
489 "messages": [
490 {
491 "role": "user",
492 "content": user_prompt
493 }
494 ]
495 });
496
497 let response = self
498 .http_client
499 .post("https://api.anthropic.com/v1/messages")
500 .header("x-api-key", api_key)
501 .header("content-type", "application/json")
502 .header("anthropic-version", "2023-06-01")
503 .json(&payload)
504 .send()
505 .await?;
506
507 if !response.status().is_success() {
508 let error_text = response.text().await?;
509 return Err(anyhow!("Anthropic API error: {}", error_text));
510 }
511
512 let response_json: serde_json::Value = response.json().await?;
513
514 let raw_answer = response_json["content"][0]["text"]
515 .as_str()
516 .ok_or_else(|| anyhow!("Invalid Anthropic response format"))?;
517 let answer = self.extract_final_answer(raw_answer);
518
519 let usage = &response_json["usage"];
520 let tokens_used = usage["output_tokens"].as_u64().map(|t| t as u32);
521 let finish_reason = response_json["stop_reason"].as_str().map(|s| s.to_string());
522
523 let citations = self.extract_citations(&answer, results);
524
525 Ok(LlmResponse {
526 answer,
527 sources_used: results.iter().map(|r| r.id.clone()).collect(),
528 confidence: Some(0.85), provider_used: LlmProvider::Anthropic,
530 model_used: model,
531 tokens_used,
532 response_time_ms: 0,
533 finish_reason,
534 citations,
535 })
536 }
537
538 async fn synthesize_with_groq(
540 &self,
541 query: &str,
542 results: &[RagSearchResult],
543 ) -> Result<LlmResponse> {
544 let api_key = self
545 .config
546 .groq_api_key
547 .as_ref()
548 .ok_or_else(|| anyhow!("Groq API key not configured"))?;
549
550 let model = self.get_model_name(&LlmProvider::Groq);
551 let system_prompt = self.create_system_prompt();
552 let user_prompt = self.create_user_prompt(query, results);
553
554 let payload = serde_json::json!({
555 "model": model,
556 "messages": [
557 {
558 "role": "system",
559 "content": system_prompt
560 },
561 {
562 "role": "user",
563 "content": user_prompt
564 }
565 ],
566 "max_tokens": self.config.max_tokens,
567 "temperature": self.config.temperature,
568 "stream": false
569 });
570
571 let response = self
572 .http_client
573 .post("https://api.groq.com/openai/v1/chat/completions")
574 .header("Authorization", format!("Bearer {}", api_key))
575 .header("Content-Type", "application/json")
576 .json(&payload)
577 .send()
578 .await?;
579
580 if !response.status().is_success() {
581 let status = response.status();
582 let error_text = response.text().await?;
583 log::error!(
584 "Groq API error - Status: {}, Response: {}",
585 status,
586 error_text
587 );
588 return Err(anyhow!("Groq API error ({}): {}", status, error_text));
589 }
590
591 let response_json: serde_json::Value = response.json().await?;
592
593 let raw_answer = response_json["choices"][0]["message"]["content"]
594 .as_str()
595 .ok_or_else(|| anyhow!("Invalid Groq response format"))?;
596 let answer = self.extract_final_answer(raw_answer);
597
598 let usage = &response_json["usage"];
599 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
600 let finish_reason = response_json["choices"][0]["finish_reason"]
601 .as_str()
602 .map(|s| s.to_string());
603
604 let citations = self.extract_citations(&answer, results);
605
606 Ok(LlmResponse {
607 answer,
608 sources_used: results.iter().map(|r| r.id.clone()).collect(),
609 confidence: Some(0.8), provider_used: LlmProvider::Groq,
611 model_used: model,
612 tokens_used,
613 response_time_ms: 0,
614 finish_reason,
615 citations,
616 })
617 }
618
619 async fn synthesize_with_openrouter(
621 &self,
622 query: &str,
623 results: &[RagSearchResult],
624 ) -> Result<LlmResponse> {
625 let api_key = self
626 .config
627 .openrouter_api_key
628 .as_ref()
629 .ok_or_else(|| anyhow!("OpenRouter API key not configured"))?;
630
631 let model = self.get_model_name(&LlmProvider::OpenRouter);
632 let system_prompt = self.create_system_prompt();
633 let user_prompt = self.create_user_prompt(query, results);
634
635 let payload = serde_json::json!({
636 "model": model,
637 "messages": [
638 {
639 "role": "system",
640 "content": system_prompt
641 },
642 {
643 "role": "user",
644 "content": user_prompt
645 }
646 ],
647 "max_tokens": self.config.max_tokens,
648 "temperature": self.config.temperature,
649 "stream": self.config.streaming
650 });
651
652 let response = self
653 .http_client
654 .post("https://openrouter.ai/api/v1/chat/completions")
655 .header("Authorization", format!("Bearer {}", api_key))
656 .header("Content-Type", "application/json")
657 .header("HTTP-Referer", "https://github.com/neur0map/manx")
658 .header("X-Title", "Manx Documentation Finder")
659 .json(&payload)
660 .send()
661 .await?;
662
663 if !response.status().is_success() {
664 let error_text = response.text().await?;
665 return Err(anyhow!("OpenRouter API error: {}", error_text));
666 }
667
668 let response_json: serde_json::Value = response.json().await?;
669
670 let raw_answer = response_json["choices"][0]["message"]["content"]
671 .as_str()
672 .ok_or_else(|| anyhow!("Invalid OpenRouter response format"))?;
673 let answer = self.extract_final_answer(raw_answer);
674
675 let usage = &response_json["usage"];
676 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
677 let finish_reason = response_json["choices"][0]["finish_reason"]
678 .as_str()
679 .map(|s| s.to_string());
680
681 let citations = self.extract_citations(&answer, results);
682
683 Ok(LlmResponse {
684 answer,
685 sources_used: results.iter().map(|r| r.id.clone()).collect(),
686 confidence: Some(0.82), provider_used: LlmProvider::OpenRouter,
688 model_used: model,
689 tokens_used,
690 response_time_ms: 0,
691 finish_reason,
692 citations,
693 })
694 }
695
696 async fn synthesize_with_huggingface(
698 &self,
699 query: &str,
700 results: &[RagSearchResult],
701 ) -> Result<LlmResponse> {
702 let api_key = self
703 .config
704 .huggingface_api_key
705 .as_ref()
706 .ok_or_else(|| anyhow!("HuggingFace API key not configured"))?;
707
708 let model = self.get_model_name(&LlmProvider::HuggingFace);
709 let system_prompt = self.create_system_prompt();
710 let user_prompt = self.create_user_prompt(query, results);
711
712 let payload = serde_json::json!({
714 "model": model,
715 "messages": [
716 {"role": "system", "content": system_prompt},
717 {"role": "user", "content": user_prompt}
718 ],
719 "max_tokens": self.config.max_tokens,
720 "temperature": self.config.temperature
721 });
722
723 let response = self
724 .http_client
725 .post("https://router.huggingface.co/v1/chat/completions")
726 .header("Authorization", format!("Bearer {}", api_key))
727 .header("Content-Type", "application/json")
728 .json(&payload)
729 .send()
730 .await?;
731
732 if !response.status().is_success() {
733 let error_text = response.text().await?;
734 return Err(anyhow!("HuggingFace API error: {}", error_text));
735 }
736
737 let response_json: serde_json::Value = response.json().await?;
738
739 let raw_answer = if let Some(choices) = response_json["choices"].as_array() {
740 if let Some(first_choice) = choices.first() {
741 if let Some(message) = first_choice["message"].as_object() {
742 message["content"].as_str().unwrap_or("")
743 } else {
744 return Err(anyhow!(
745 "Invalid HuggingFace response format: missing message"
746 ));
747 }
748 } else {
749 return Err(anyhow!(
750 "Invalid HuggingFace response format: empty choices"
751 ));
752 }
753 } else {
754 return Err(anyhow!(
755 "Invalid HuggingFace response format: missing choices"
756 ));
757 };
758
759 let answer = self.extract_final_answer(raw_answer);
760
761 let citations = self.extract_citations(&answer, results);
762
763 Ok(LlmResponse {
764 answer,
765 sources_used: results.iter().map(|r| r.id.clone()).collect(),
766 confidence: Some(0.75), provider_used: LlmProvider::HuggingFace,
768 model_used: model,
769 tokens_used: response_json["usage"]["total_tokens"]
770 .as_u64()
771 .map(|t| t as u32),
772 response_time_ms: 0,
773 finish_reason: response_json["choices"][0]["finish_reason"]
774 .as_str()
775 .map(|s| s.to_string()),
776 citations,
777 })
778 }
779
780 async fn synthesize_with_custom(
782 &self,
783 query: &str,
784 results: &[RagSearchResult],
785 ) -> Result<LlmResponse> {
786 let endpoint = self
787 .config
788 .custom_endpoint
789 .as_ref()
790 .ok_or_else(|| anyhow!("Custom endpoint not configured"))?;
791
792 let model = self.get_model_name(&LlmProvider::Custom);
793 let system_prompt = self.create_system_prompt();
794 let user_prompt = self.create_user_prompt(query, results);
795
796 let payload = serde_json::json!({
798 "model": model,
799 "messages": [
800 {
801 "role": "system",
802 "content": system_prompt
803 },
804 {
805 "role": "user",
806 "content": user_prompt
807 }
808 ],
809 "max_tokens": self.config.max_tokens,
810 "temperature": self.config.temperature,
811 "stream": self.config.streaming
812 });
813
814 let response = self
815 .http_client
816 .post(format!("{}/v1/chat/completions", endpoint))
817 .header("Content-Type", "application/json")
818 .json(&payload)
819 .send()
820 .await?;
821
822 if !response.status().is_success() {
823 let error_text = response.text().await?;
824 return Err(anyhow!("Custom endpoint error: {}", error_text));
825 }
826
827 let response_json: serde_json::Value = response.json().await?;
828
829 let raw_answer = response_json["choices"][0]["message"]["content"]
830 .as_str()
831 .ok_or_else(|| anyhow!("Invalid custom endpoint response format"))?;
832 let answer = self.extract_final_answer(raw_answer);
833
834 let usage = &response_json["usage"];
835 let tokens_used = usage
836 .get("total_tokens")
837 .and_then(|t| t.as_u64())
838 .map(|t| t as u32);
839 let finish_reason = response_json["choices"][0]
840 .get("finish_reason")
841 .and_then(|r| r.as_str())
842 .map(|s| s.to_string());
843
844 let citations = self.extract_citations(&answer, results);
845
846 Ok(LlmResponse {
847 answer,
848 sources_used: results.iter().map(|r| r.id.clone()).collect(),
849 confidence: Some(0.8), provider_used: LlmProvider::Custom,
851 model_used: model,
852 tokens_used,
853 response_time_ms: 0,
854 finish_reason,
855 citations,
856 })
857 }
858}
859
860#[cfg(test)]
861mod tests {
862 use super::*;
863
864 #[test]
865 fn test_extract_final_answer_with_thinking_tags() {
866 let client = LlmClient::new(LlmConfig::default()).unwrap();
867
868 let response_with_thinking = r#"<thinking>
869Let me analyze this query about Rust error handling.
870
871The user is asking about Result types and how to handle errors properly.
872I should explain the basics of Result<T, E> and common patterns.
873</thinking>
874
875**Quick Answer**
876Rust uses `Result<T, E>` for error handling, where `T` is the success type and `E` is the error type.
877
878**Key Points**
879- Use `?` operator for error propagation
880- `unwrap()` panics on error, avoid in production
881- `expect()` provides custom panic message
882- Pattern match with `match` for comprehensive handling"#;
883
884 let extracted = client.extract_final_answer(response_with_thinking);
885
886 assert!(!extracted.contains("<thinking>"));
887 assert!(!extracted.contains("</thinking>"));
888 assert!(extracted.contains("**Quick Answer**"));
889 assert!(extracted.contains("Result<T, E>"));
890 }
891
892 #[test]
893 fn test_extract_final_answer_with_think_tags() {
894 let client = LlmClient::new(LlmConfig::default()).unwrap();
895
896 let response_with_think = r#"<think>
897This question is about JavaScript async/await patterns.
898
899The user wants to understand how to handle asynchronous operations.
900I should provide clear examples and best practices.
901</think>
902
903**Quick Answer**
904Use `async/await` for handling asynchronous operations in JavaScript.
905
906**Key Points**
907- `async` functions return Promises
908- `await` pauses execution until Promise resolves
909- Use try/catch for error handling
910- Avoid callback hell with Promise chains"#;
911
912 let extracted = client.extract_final_answer(response_with_think);
913
914 assert!(!extracted.contains("<think>"));
915 assert!(!extracted.contains("</think>"));
916 assert!(extracted.contains("**Quick Answer**"));
917 assert!(extracted.contains("async/await"));
918 }
919
920 #[test]
921 fn test_extract_final_answer_without_thinking() {
922 let client = LlmClient::new(LlmConfig::default()).unwrap();
923
924 let normal_response = r#"**Quick Answer**
925This is a normal response without thinking tags.
926
927**Key Points**
928- Point 1
929- Point 2"#;
930
931 let extracted = client.extract_final_answer(normal_response);
932
933 assert_eq!(extracted, normal_response);
934 }
935
936 #[test]
937 fn test_extract_final_answer_with_thinking_prefix() {
938 let client = LlmClient::new(LlmConfig::default()).unwrap();
939
940 let response_with_prefix = r#"Let me think about this question carefully...
941
942I need to consider the different aspects of the query.
943
944Based on the search results:
945
946**Quick Answer**
947Here is the actual answer after thinking.
948
949**Key Points**
950- Important point 1
951- Important point 2"#;
952
953 let extracted = client.extract_final_answer(response_with_prefix);
954
955 assert!(!extracted.contains("Let me think"));
956 assert!(extracted.contains("**Quick Answer**"));
957 assert!(extracted.contains("Here is the actual answer"));
958 }
959}