1use anyhow::{anyhow, Result};
7use serde::{Deserialize, Serialize};
8
9use crate::rag::RagSearchResult;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct LlmConfig {
14 pub openai_api_key: Option<String>,
15 pub anthropic_api_key: Option<String>,
16 pub groq_api_key: Option<String>,
17 pub openrouter_api_key: Option<String>,
18 pub huggingface_api_key: Option<String>,
19 pub custom_endpoint: Option<String>,
20 pub preferred_provider: LlmProvider,
21 pub fallback_providers: Vec<LlmProvider>,
22 pub timeout_seconds: u64,
23 pub max_tokens: u32,
24 pub temperature: f32,
25 pub model_name: Option<String>,
26 pub streaming: bool,
27}
28
29impl Default for LlmConfig {
30 fn default() -> Self {
31 Self {
32 openai_api_key: None,
33 anthropic_api_key: None,
34 groq_api_key: None,
35 openrouter_api_key: None,
36 huggingface_api_key: None,
37 custom_endpoint: None,
38 preferred_provider: LlmProvider::Auto,
39 fallback_providers: vec![
40 LlmProvider::OpenAI,
41 LlmProvider::Anthropic,
42 LlmProvider::Groq,
43 LlmProvider::OpenRouter,
44 ],
45 timeout_seconds: 30,
46 max_tokens: 1000,
47 temperature: 0.1,
48 model_name: None,
49 streaming: false,
50 }
51 }
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
56pub enum LlmProvider {
57 Auto,
58 OpenAI,
59 Anthropic,
60 Groq,
61 OpenRouter,
62 HuggingFace,
63 Custom,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct LlmResponse {
69 pub answer: String,
70 pub sources_used: Vec<String>,
71 pub confidence: Option<f32>,
72 pub provider_used: LlmProvider,
73 pub model_used: String,
74 pub tokens_used: Option<u32>,
75 pub response_time_ms: u64,
76 pub finish_reason: Option<String>,
77 pub citations: Vec<Citation>,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct Citation {
83 pub source_id: String,
84 pub source_title: String,
85 pub source_url: Option<String>,
86 pub relevance_score: f32,
87 pub excerpt: String,
88}
89
90#[derive(Clone)]
92pub struct LlmClient {
93 pub(crate) config: LlmConfig,
94 pub(crate) http_client: reqwest::Client,
95}
96
97impl LlmClient {
98 pub fn new(config: LlmConfig) -> Result<Self> {
100 let http_client = reqwest::Client::builder()
101 .timeout(std::time::Duration::from_secs(config.timeout_seconds))
102 .build()?;
103
104 Ok(Self {
105 config,
106 http_client,
107 })
108 }
109
110 pub fn is_available(&self) -> bool {
112 self.has_openai_key()
113 || self.has_anthropic_key()
114 || self.has_groq_key()
115 || self.has_openrouter_key()
116 || self.has_huggingface_key()
117 || self.config.custom_endpoint.is_some()
118 }
119
120 pub fn has_openai_key(&self) -> bool {
122 self.config
123 .openai_api_key
124 .as_ref()
125 .is_some_and(|key| !key.is_empty())
126 }
127
128 pub fn has_anthropic_key(&self) -> bool {
129 self.config
130 .anthropic_api_key
131 .as_ref()
132 .is_some_and(|key| !key.is_empty())
133 }
134
135 pub fn has_groq_key(&self) -> bool {
136 self.config
137 .groq_api_key
138 .as_ref()
139 .is_some_and(|key| !key.is_empty())
140 }
141
142 pub fn has_openrouter_key(&self) -> bool {
143 self.config
144 .openrouter_api_key
145 .as_ref()
146 .is_some_and(|key| !key.is_empty())
147 }
148
149 pub fn has_huggingface_key(&self) -> bool {
150 self.config
151 .huggingface_api_key
152 .as_ref()
153 .is_some_and(|key| !key.is_empty())
154 }
155
156 pub fn get_best_provider(&self) -> Option<LlmProvider> {
158 if self.config.preferred_provider != LlmProvider::Auto {
159 if self.is_provider_available(&self.config.preferred_provider) {
161 return Some(self.config.preferred_provider.clone());
162 }
163 }
164
165 for provider in &self.config.fallback_providers {
167 if self.is_provider_available(provider) {
168 return Some(provider.clone());
169 }
170 }
171
172 None
173 }
174
175 pub fn is_provider_available(&self, provider: &LlmProvider) -> bool {
177 match provider {
178 LlmProvider::OpenAI => self.has_openai_key(),
179 LlmProvider::Anthropic => self.has_anthropic_key(),
180 LlmProvider::Groq => self.has_groq_key(),
181 LlmProvider::OpenRouter => self.has_openrouter_key(),
182 LlmProvider::HuggingFace => self.has_huggingface_key(),
183 LlmProvider::Custom => self.config.custom_endpoint.is_some(),
184 LlmProvider::Auto => false, }
186 }
187
188 pub async fn synthesize_answer(
190 &self,
191 query: &str,
192 results: &[RagSearchResult],
193 ) -> Result<LlmResponse> {
194 let provider = self
195 .get_best_provider()
196 .ok_or_else(|| anyhow!("No LLM provider available"))?;
197
198 let start_time = std::time::Instant::now();
199
200 let response = match provider {
201 LlmProvider::OpenAI => self.synthesize_with_openai(query, results).await,
202 LlmProvider::Anthropic => self.synthesize_with_anthropic(query, results).await,
203 LlmProvider::Groq => self.synthesize_with_groq(query, results).await,
204 LlmProvider::OpenRouter => self.synthesize_with_openrouter(query, results).await,
205 LlmProvider::HuggingFace => self.synthesize_with_huggingface(query, results).await,
206 LlmProvider::Custom => self.synthesize_with_custom(query, results).await,
207 LlmProvider::Auto => unreachable!(),
208 };
209
210 match response {
212 Ok(mut resp) => {
213 resp.response_time_ms = start_time.elapsed().as_millis() as u64;
214 Ok(resp)
215 }
216 Err(e) => {
217 log::warn!("Primary provider {:?} failed: {}", provider, e);
218 self.try_fallback_providers(query, results, &provider).await
219 }
220 }
221 }
222
223 async fn try_fallback_providers(
225 &self,
226 query: &str,
227 results: &[RagSearchResult],
228 failed_provider: &LlmProvider,
229 ) -> Result<LlmResponse> {
230 for provider in &self.config.fallback_providers {
231 if provider != failed_provider && self.is_provider_available(provider) {
232 log::info!("Trying fallback provider: {:?}", provider);
233
234 let start_time = std::time::Instant::now();
235 let response = match provider {
236 LlmProvider::OpenAI => self.synthesize_with_openai(query, results).await,
237 LlmProvider::Anthropic => self.synthesize_with_anthropic(query, results).await,
238 LlmProvider::Groq => self.synthesize_with_groq(query, results).await,
239 LlmProvider::OpenRouter => {
240 self.synthesize_with_openrouter(query, results).await
241 }
242 LlmProvider::HuggingFace => {
243 self.synthesize_with_huggingface(query, results).await
244 }
245 LlmProvider::Custom => self.synthesize_with_custom(query, results).await,
246 LlmProvider::Auto => continue,
247 };
248
249 if let Ok(mut resp) = response {
250 resp.response_time_ms = start_time.elapsed().as_millis() as u64;
251 return Ok(resp);
252 }
253 }
254 }
255
256 Err(anyhow!("All LLM providers failed"))
257 }
258
259 fn get_model_name(&self, provider: &LlmProvider) -> String {
261 if let Some(model) = &self.config.model_name {
262 return model.clone();
263 }
264
265 match provider {
266 LlmProvider::OpenAI => "gpt-4o-mini".to_string(),
267 LlmProvider::Anthropic => "claude-3-haiku-20240307".to_string(),
268 LlmProvider::Groq => "llama-3.1-8b-instant".to_string(),
269 LlmProvider::OpenRouter => "openai/gpt-3.5-turbo".to_string(),
270 LlmProvider::HuggingFace => "microsoft/DialoGPT-medium".to_string(),
271 LlmProvider::Custom => "custom-model".to_string(),
272 LlmProvider::Auto => "auto".to_string(),
273 }
274 }
275
276 fn create_system_prompt(&self) -> String {
278 r#"You are a concise technical documentation assistant. Provide clear, scannable answers based ONLY on the provided search results.
279
280RESPONSE FORMAT:
2811. **Quick Answer** (1-2 sentences max)
2822. **Key Points** (bullet points, max 4 items)
2833. **Code Example** (if available - keep it short and practical)
284
285RULES:
286- Be extremely concise and scannable
287- Use bullet points and short paragraphs
288- Only include essential information
289- Cite sources as [Source N]
290- Never add information not in the sources
291- Focus on what developers need to know immediately
292
293STYLE:
294- Write for busy developers who want quick answers
295- Use clear, simple language
296- Keep code examples minimal but complete
297- Prioritize readability over completeness"#.to_string()
298 }
299
300 fn create_user_prompt(&self, query: &str, results: &[RagSearchResult]) -> String {
302 let mut prompt = format!("Question: {}\n\nSearch Results:\n\n", query);
303
304 for (i, result) in results.iter().enumerate() {
305 prompt.push_str(&format!(
306 "[Source {}] {}\nURL: {}\nContent: {}\n\n",
307 i + 1,
308 result.title.as_ref().unwrap_or(&"Untitled".to_string()),
309 result.source_path.to_string_lossy(),
310 result.content.chars().take(1000).collect::<String>()
311 ));
312 }
313
314 prompt.push_str("\nPlease provide a comprehensive answer based on these search results.");
315 prompt
316 }
317
318 fn extract_final_answer(&self, response_text: &str) -> String {
320 if response_text.contains("<thinking>") && response_text.contains("</thinking>") {
322 if let Some(thinking_end) = response_text.find("</thinking>") {
324 let after_thinking = &response_text[thinking_end + "</thinking>".len()..];
325 return after_thinking.trim().to_string();
326 }
327 }
328
329 if response_text.contains("<think>") && response_text.contains("</think>") {
331 if let Some(think_end) = response_text.find("</think>") {
333 let after_think = &response_text[think_end + "</think>".len()..];
334 return after_think.trim().to_string();
335 }
336 }
337
338 if response_text.starts_with("Let me think") || response_text.starts_with("I need to think")
341 {
342 let transition_phrases = [
344 "Here's my answer:",
345 "My answer is:",
346 "To answer your question:",
347 "Based on the search results:",
348 "The answer is:",
349 "\n\n**", "\n\nQuick Answer:",
351 "\n\n##", ];
353
354 for phrase in &transition_phrases {
355 if let Some(pos) = response_text.find(phrase) {
356 let answer_start = if phrase.starts_with('\n') {
357 pos + 2 } else {
359 pos + phrase.len()
360 };
361 return response_text[answer_start..].trim().to_string();
362 }
363 }
364 }
365
366 response_text.to_string()
368 }
369
370 fn extract_citations(&self, response_text: &str, results: &[RagSearchResult]) -> Vec<Citation> {
372 let mut citations = Vec::new();
373
374 for (i, result) in results.iter().enumerate() {
376 let source_ref = format!("[Source {}]", i + 1);
377 if response_text.contains(&source_ref) {
378 citations.push(Citation {
379 source_id: result.id.clone(),
380 source_title: result
381 .title
382 .clone()
383 .unwrap_or_else(|| "Untitled".to_string()),
384 source_url: Some(result.source_path.to_string_lossy().to_string()),
385 relevance_score: result.score,
386 excerpt: result.content.chars().take(200).collect(),
387 });
388 }
389 }
390
391 citations
392 }
393
394 async fn synthesize_with_openai(
396 &self,
397 query: &str,
398 results: &[RagSearchResult],
399 ) -> Result<LlmResponse> {
400 let api_key = self
401 .config
402 .openai_api_key
403 .as_ref()
404 .ok_or_else(|| anyhow!("OpenAI API key not configured"))?;
405
406 let model = self.get_model_name(&LlmProvider::OpenAI);
407 let system_prompt = self.create_system_prompt();
408 let user_prompt = self.create_user_prompt(query, results);
409
410 let payload = serde_json::json!({
411 "model": model,
412 "messages": [
413 {
414 "role": "system",
415 "content": system_prompt
416 },
417 {
418 "role": "user",
419 "content": user_prompt
420 }
421 ],
422 "max_tokens": self.config.max_tokens,
423 "temperature": self.config.temperature,
424 "stream": self.config.streaming
425 });
426
427 let response = self
428 .http_client
429 .post("https://api.openai.com/v1/chat/completions")
430 .header("Authorization", format!("Bearer {}", api_key))
431 .header("Content-Type", "application/json")
432 .json(&payload)
433 .send()
434 .await?;
435
436 if !response.status().is_success() {
437 let error_text = response.text().await?;
438 return Err(anyhow!("OpenAI API error: {}", error_text));
439 }
440
441 let response_json: serde_json::Value = response.json().await?;
442
443 let raw_answer = response_json["choices"][0]["message"]["content"]
444 .as_str()
445 .ok_or_else(|| anyhow!("Invalid OpenAI response format"))?;
446 let answer = self.extract_final_answer(raw_answer);
447
448 let usage = &response_json["usage"];
449 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
450 let finish_reason = response_json["choices"][0]["finish_reason"]
451 .as_str()
452 .map(|s| s.to_string());
453
454 let citations = self.extract_citations(&answer, results);
455
456 Ok(LlmResponse {
457 answer,
458 sources_used: results.iter().map(|r| r.id.clone()).collect(),
459 confidence: Some(0.9), provider_used: LlmProvider::OpenAI,
461 model_used: model,
462 tokens_used,
463 response_time_ms: 0, finish_reason,
465 citations,
466 })
467 }
468
469 async fn synthesize_with_anthropic(
471 &self,
472 query: &str,
473 results: &[RagSearchResult],
474 ) -> Result<LlmResponse> {
475 let api_key = self
476 .config
477 .anthropic_api_key
478 .as_ref()
479 .ok_or_else(|| anyhow!("Anthropic API key not configured"))?;
480
481 let model = self.get_model_name(&LlmProvider::Anthropic);
482 let system_prompt = self.create_system_prompt();
483 let user_prompt = self.create_user_prompt(query, results);
484
485 let payload = serde_json::json!({
486 "model": model,
487 "max_tokens": self.config.max_tokens,
488 "temperature": self.config.temperature,
489 "system": system_prompt,
490 "messages": [
491 {
492 "role": "user",
493 "content": user_prompt
494 }
495 ]
496 });
497
498 let response = self
499 .http_client
500 .post("https://api.anthropic.com/v1/messages")
501 .header("x-api-key", api_key)
502 .header("content-type", "application/json")
503 .header("anthropic-version", "2023-06-01")
504 .json(&payload)
505 .send()
506 .await?;
507
508 if !response.status().is_success() {
509 let error_text = response.text().await?;
510 return Err(anyhow!("Anthropic API error: {}", error_text));
511 }
512
513 let response_json: serde_json::Value = response.json().await?;
514
515 let raw_answer = response_json["content"][0]["text"]
516 .as_str()
517 .ok_or_else(|| anyhow!("Invalid Anthropic response format"))?;
518 let answer = self.extract_final_answer(raw_answer);
519
520 let usage = &response_json["usage"];
521 let tokens_used = usage["output_tokens"].as_u64().map(|t| t as u32);
522 let finish_reason = response_json["stop_reason"].as_str().map(|s| s.to_string());
523
524 let citations = self.extract_citations(&answer, results);
525
526 Ok(LlmResponse {
527 answer,
528 sources_used: results.iter().map(|r| r.id.clone()).collect(),
529 confidence: Some(0.85), provider_used: LlmProvider::Anthropic,
531 model_used: model,
532 tokens_used,
533 response_time_ms: 0,
534 finish_reason,
535 citations,
536 })
537 }
538
539 async fn synthesize_with_groq(
541 &self,
542 query: &str,
543 results: &[RagSearchResult],
544 ) -> Result<LlmResponse> {
545 let api_key = self
546 .config
547 .groq_api_key
548 .as_ref()
549 .ok_or_else(|| anyhow!("Groq API key not configured"))?;
550
551 let model = self.get_model_name(&LlmProvider::Groq);
552 let system_prompt = self.create_system_prompt();
553 let user_prompt = self.create_user_prompt(query, results);
554
555 let payload = serde_json::json!({
556 "model": model,
557 "messages": [
558 {
559 "role": "system",
560 "content": system_prompt
561 },
562 {
563 "role": "user",
564 "content": user_prompt
565 }
566 ],
567 "max_tokens": self.config.max_tokens,
568 "temperature": self.config.temperature,
569 "stream": false
570 });
571
572 let response = self
573 .http_client
574 .post("https://api.groq.com/openai/v1/chat/completions")
575 .header("Authorization", format!("Bearer {}", api_key))
576 .header("Content-Type", "application/json")
577 .json(&payload)
578 .send()
579 .await?;
580
581 if !response.status().is_success() {
582 let status = response.status();
583 let error_text = response.text().await?;
584 log::error!(
585 "Groq API error - Status: {}, Response: {}",
586 status,
587 error_text
588 );
589 return Err(anyhow!("Groq API error ({}): {}", status, error_text));
590 }
591
592 let response_json: serde_json::Value = response.json().await?;
593
594 let raw_answer = response_json["choices"][0]["message"]["content"]
595 .as_str()
596 .ok_or_else(|| anyhow!("Invalid Groq response format"))?;
597 let answer = self.extract_final_answer(raw_answer);
598
599 let usage = &response_json["usage"];
600 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
601 let finish_reason = response_json["choices"][0]["finish_reason"]
602 .as_str()
603 .map(|s| s.to_string());
604
605 let citations = self.extract_citations(&answer, results);
606
607 Ok(LlmResponse {
608 answer,
609 sources_used: results.iter().map(|r| r.id.clone()).collect(),
610 confidence: Some(0.8), provider_used: LlmProvider::Groq,
612 model_used: model,
613 tokens_used,
614 response_time_ms: 0,
615 finish_reason,
616 citations,
617 })
618 }
619
620 async fn synthesize_with_openrouter(
622 &self,
623 query: &str,
624 results: &[RagSearchResult],
625 ) -> Result<LlmResponse> {
626 let api_key = self
627 .config
628 .openrouter_api_key
629 .as_ref()
630 .ok_or_else(|| anyhow!("OpenRouter API key not configured"))?;
631
632 let model = self.get_model_name(&LlmProvider::OpenRouter);
633 let system_prompt = self.create_system_prompt();
634 let user_prompt = self.create_user_prompt(query, results);
635
636 let payload = serde_json::json!({
637 "model": model,
638 "messages": [
639 {
640 "role": "system",
641 "content": system_prompt
642 },
643 {
644 "role": "user",
645 "content": user_prompt
646 }
647 ],
648 "max_tokens": self.config.max_tokens,
649 "temperature": self.config.temperature,
650 "stream": self.config.streaming
651 });
652
653 let response = self
654 .http_client
655 .post("https://openrouter.ai/api/v1/chat/completions")
656 .header("Authorization", format!("Bearer {}", api_key))
657 .header("Content-Type", "application/json")
658 .header("HTTP-Referer", "https://github.com/neur0map/manx")
659 .header("X-Title", "Manx Documentation Finder")
660 .json(&payload)
661 .send()
662 .await?;
663
664 if !response.status().is_success() {
665 let error_text = response.text().await?;
666 return Err(anyhow!("OpenRouter API error: {}", error_text));
667 }
668
669 let response_json: serde_json::Value = response.json().await?;
670
671 let raw_answer = response_json["choices"][0]["message"]["content"]
672 .as_str()
673 .ok_or_else(|| anyhow!("Invalid OpenRouter response format"))?;
674 let answer = self.extract_final_answer(raw_answer);
675
676 let usage = &response_json["usage"];
677 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
678 let finish_reason = response_json["choices"][0]["finish_reason"]
679 .as_str()
680 .map(|s| s.to_string());
681
682 let citations = self.extract_citations(&answer, results);
683
684 Ok(LlmResponse {
685 answer,
686 sources_used: results.iter().map(|r| r.id.clone()).collect(),
687 confidence: Some(0.82), provider_used: LlmProvider::OpenRouter,
689 model_used: model,
690 tokens_used,
691 response_time_ms: 0,
692 finish_reason,
693 citations,
694 })
695 }
696
697 async fn synthesize_with_huggingface(
699 &self,
700 query: &str,
701 results: &[RagSearchResult],
702 ) -> Result<LlmResponse> {
703 let api_key = self
704 .config
705 .huggingface_api_key
706 .as_ref()
707 .ok_or_else(|| anyhow!("HuggingFace API key not configured"))?;
708
709 let model = self.get_model_name(&LlmProvider::HuggingFace);
710 let system_prompt = self.create_system_prompt();
711 let user_prompt = self.create_user_prompt(query, results);
712
713 let payload = serde_json::json!({
715 "model": model,
716 "messages": [
717 {"role": "system", "content": system_prompt},
718 {"role": "user", "content": user_prompt}
719 ],
720 "max_tokens": self.config.max_tokens,
721 "temperature": self.config.temperature
722 });
723
724 let response = self
725 .http_client
726 .post("https://router.huggingface.co/v1/chat/completions")
727 .header("Authorization", format!("Bearer {}", api_key))
728 .header("Content-Type", "application/json")
729 .json(&payload)
730 .send()
731 .await?;
732
733 if !response.status().is_success() {
734 let error_text = response.text().await?;
735 return Err(anyhow!("HuggingFace API error: {}", error_text));
736 }
737
738 let response_json: serde_json::Value = response.json().await?;
739
740 let raw_answer = if let Some(choices) = response_json["choices"].as_array() {
741 if let Some(first_choice) = choices.first() {
742 if let Some(message) = first_choice["message"].as_object() {
743 message["content"].as_str().unwrap_or("")
744 } else {
745 return Err(anyhow!(
746 "Invalid HuggingFace response format: missing message"
747 ));
748 }
749 } else {
750 return Err(anyhow!(
751 "Invalid HuggingFace response format: empty choices"
752 ));
753 }
754 } else {
755 return Err(anyhow!(
756 "Invalid HuggingFace response format: missing choices"
757 ));
758 };
759
760 let answer = self.extract_final_answer(raw_answer);
761
762 let citations = self.extract_citations(&answer, results);
763
764 Ok(LlmResponse {
765 answer,
766 sources_used: results.iter().map(|r| r.id.clone()).collect(),
767 confidence: Some(0.75), provider_used: LlmProvider::HuggingFace,
769 model_used: model,
770 tokens_used: response_json["usage"]["total_tokens"]
771 .as_u64()
772 .map(|t| t as u32),
773 response_time_ms: 0,
774 finish_reason: response_json["choices"][0]["finish_reason"]
775 .as_str()
776 .map(|s| s.to_string()),
777 citations,
778 })
779 }
780
781 async fn synthesize_with_custom(
783 &self,
784 query: &str,
785 results: &[RagSearchResult],
786 ) -> Result<LlmResponse> {
787 let endpoint = self
788 .config
789 .custom_endpoint
790 .as_ref()
791 .ok_or_else(|| anyhow!("Custom endpoint not configured"))?;
792
793 let model = self.get_model_name(&LlmProvider::Custom);
794 let system_prompt = self.create_system_prompt();
795 let user_prompt = self.create_user_prompt(query, results);
796
797 let payload = serde_json::json!({
799 "model": model,
800 "messages": [
801 {
802 "role": "system",
803 "content": system_prompt
804 },
805 {
806 "role": "user",
807 "content": user_prompt
808 }
809 ],
810 "max_tokens": self.config.max_tokens,
811 "temperature": self.config.temperature,
812 "stream": self.config.streaming
813 });
814
815 let response = self
816 .http_client
817 .post(format!("{}/v1/chat/completions", endpoint))
818 .header("Content-Type", "application/json")
819 .json(&payload)
820 .send()
821 .await?;
822
823 if !response.status().is_success() {
824 let error_text = response.text().await?;
825 return Err(anyhow!("Custom endpoint error: {}", error_text));
826 }
827
828 let response_json: serde_json::Value = response.json().await?;
829
830 let raw_answer = response_json["choices"][0]["message"]["content"]
831 .as_str()
832 .ok_or_else(|| anyhow!("Invalid custom endpoint response format"))?;
833 let answer = self.extract_final_answer(raw_answer);
834
835 let usage = &response_json["usage"];
836 let tokens_used = usage
837 .get("total_tokens")
838 .and_then(|t| t.as_u64())
839 .map(|t| t as u32);
840 let finish_reason = response_json["choices"][0]
841 .get("finish_reason")
842 .and_then(|r| r.as_str())
843 .map(|s| s.to_string());
844
845 let citations = self.extract_citations(&answer, results);
846
847 Ok(LlmResponse {
848 answer,
849 sources_used: results.iter().map(|r| r.id.clone()).collect(),
850 confidence: Some(0.8), provider_used: LlmProvider::Custom,
852 model_used: model,
853 tokens_used,
854 response_time_ms: 0,
855 finish_reason,
856 citations,
857 })
858 }
859}
860
861#[cfg(test)]
862mod tests {
863 use super::*;
864
865 #[test]
866 fn test_extract_final_answer_with_thinking_tags() {
867 let client = LlmClient::new(LlmConfig::default()).unwrap();
868
869 let response_with_thinking = r#"<thinking>
870Let me analyze this query about Rust error handling.
871
872The user is asking about Result types and how to handle errors properly.
873I should explain the basics of Result<T, E> and common patterns.
874</thinking>
875
876**Quick Answer**
877Rust uses `Result<T, E>` for error handling, where `T` is the success type and `E` is the error type.
878
879**Key Points**
880- Use `?` operator for error propagation
881- `unwrap()` panics on error, avoid in production
882- `expect()` provides custom panic message
883- Pattern match with `match` for comprehensive handling"#;
884
885 let extracted = client.extract_final_answer(response_with_thinking);
886
887 assert!(!extracted.contains("<thinking>"));
888 assert!(!extracted.contains("</thinking>"));
889 assert!(extracted.contains("**Quick Answer**"));
890 assert!(extracted.contains("Result<T, E>"));
891 }
892
893 #[test]
894 fn test_extract_final_answer_with_think_tags() {
895 let client = LlmClient::new(LlmConfig::default()).unwrap();
896
897 let response_with_think = r#"<think>
898This question is about JavaScript async/await patterns.
899
900The user wants to understand how to handle asynchronous operations.
901I should provide clear examples and best practices.
902</think>
903
904**Quick Answer**
905Use `async/await` for handling asynchronous operations in JavaScript.
906
907**Key Points**
908- `async` functions return Promises
909- `await` pauses execution until Promise resolves
910- Use try/catch for error handling
911- Avoid callback hell with Promise chains"#;
912
913 let extracted = client.extract_final_answer(response_with_think);
914
915 assert!(!extracted.contains("<think>"));
916 assert!(!extracted.contains("</think>"));
917 assert!(extracted.contains("**Quick Answer**"));
918 assert!(extracted.contains("async/await"));
919 }
920
921 #[test]
922 fn test_extract_final_answer_without_thinking() {
923 let client = LlmClient::new(LlmConfig::default()).unwrap();
924
925 let normal_response = r#"**Quick Answer**
926This is a normal response without thinking tags.
927
928**Key Points**
929- Point 1
930- Point 2"#;
931
932 let extracted = client.extract_final_answer(normal_response);
933
934 assert_eq!(extracted, normal_response);
935 }
936
937 #[test]
938 fn test_extract_final_answer_with_thinking_prefix() {
939 let client = LlmClient::new(LlmConfig::default()).unwrap();
940
941 let response_with_prefix = r#"Let me think about this question carefully...
942
943I need to consider the different aspects of the query.
944
945Based on the search results:
946
947**Quick Answer**
948Here is the actual answer after thinking.
949
950**Key Points**
951- Important point 1
952- Important point 2"#;
953
954 let extracted = client.extract_final_answer(response_with_prefix);
955
956 assert!(!extracted.contains("Let me think"));
957 assert!(extracted.contains("**Quick Answer**"));
958 assert!(extracted.contains("Here is the actual answer"));
959 }
960}