1use anyhow::{anyhow, Result};
7use serde::{Deserialize, Serialize};
8
9use crate::rag::RagSearchResult;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct LlmConfig {
14 pub openai_api_key: Option<String>,
15 pub anthropic_api_key: Option<String>,
16 pub groq_api_key: Option<String>,
17 pub openrouter_api_key: Option<String>,
18 pub huggingface_api_key: Option<String>,
19 pub custom_endpoint: Option<String>,
20 pub preferred_provider: LlmProvider,
21 pub fallback_providers: Vec<LlmProvider>,
22 pub timeout_seconds: u64,
23 pub max_tokens: u32,
24 pub temperature: f32,
25 pub model_name: Option<String>,
26 pub streaming: bool,
27}
28
29impl Default for LlmConfig {
30 fn default() -> Self {
31 Self {
32 openai_api_key: None,
33 anthropic_api_key: None,
34 groq_api_key: None,
35 openrouter_api_key: None,
36 huggingface_api_key: None,
37 custom_endpoint: None,
38 preferred_provider: LlmProvider::Auto,
39 fallback_providers: vec![
40 LlmProvider::OpenAI,
41 LlmProvider::Anthropic,
42 LlmProvider::Groq,
43 LlmProvider::OpenRouter,
44 ],
45 timeout_seconds: 30,
46 max_tokens: 1000,
47 temperature: 0.1,
48 model_name: None,
49 streaming: false,
50 }
51 }
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
56pub enum LlmProvider {
57 Auto,
58 OpenAI,
59 Anthropic,
60 Groq,
61 OpenRouter,
62 HuggingFace,
63 Custom,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct LlmResponse {
69 pub answer: String,
70 pub sources_used: Vec<String>,
71 pub confidence: Option<f32>,
72 pub provider_used: LlmProvider,
73 pub model_used: String,
74 pub tokens_used: Option<u32>,
75 pub response_time_ms: u64,
76 pub finish_reason: Option<String>,
77 pub citations: Vec<Citation>,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct Citation {
83 pub source_id: String,
84 pub source_title: String,
85 pub source_url: Option<String>,
86 pub relevance_score: f32,
87 pub excerpt: String,
88}
89
90pub struct LlmClient {
92 pub(crate) config: LlmConfig,
93 pub(crate) http_client: reqwest::Client,
94}
95
96impl LlmClient {
97 pub fn new(config: LlmConfig) -> Result<Self> {
99 let http_client = reqwest::Client::builder()
100 .timeout(std::time::Duration::from_secs(config.timeout_seconds))
101 .build()?;
102
103 Ok(Self {
104 config,
105 http_client,
106 })
107 }
108
109 pub fn is_available(&self) -> bool {
111 self.has_openai_key()
112 || self.has_anthropic_key()
113 || self.has_groq_key()
114 || self.has_openrouter_key()
115 || self.has_huggingface_key()
116 || self.config.custom_endpoint.is_some()
117 }
118
119 pub fn has_openai_key(&self) -> bool {
121 self.config
122 .openai_api_key
123 .as_ref()
124 .is_some_and(|key| !key.is_empty())
125 }
126
127 pub fn has_anthropic_key(&self) -> bool {
128 self.config
129 .anthropic_api_key
130 .as_ref()
131 .is_some_and(|key| !key.is_empty())
132 }
133
134 pub fn has_groq_key(&self) -> bool {
135 self.config
136 .groq_api_key
137 .as_ref()
138 .is_some_and(|key| !key.is_empty())
139 }
140
141 pub fn has_openrouter_key(&self) -> bool {
142 self.config
143 .openrouter_api_key
144 .as_ref()
145 .is_some_and(|key| !key.is_empty())
146 }
147
148 pub fn has_huggingface_key(&self) -> bool {
149 self.config
150 .huggingface_api_key
151 .as_ref()
152 .is_some_and(|key| !key.is_empty())
153 }
154
155 pub fn get_best_provider(&self) -> Option<LlmProvider> {
157 if self.config.preferred_provider != LlmProvider::Auto {
158 if self.is_provider_available(&self.config.preferred_provider) {
160 return Some(self.config.preferred_provider.clone());
161 }
162 }
163
164 for provider in &self.config.fallback_providers {
166 if self.is_provider_available(provider) {
167 return Some(provider.clone());
168 }
169 }
170
171 None
172 }
173
174 pub fn is_provider_available(&self, provider: &LlmProvider) -> bool {
176 match provider {
177 LlmProvider::OpenAI => self.has_openai_key(),
178 LlmProvider::Anthropic => self.has_anthropic_key(),
179 LlmProvider::Groq => self.has_groq_key(),
180 LlmProvider::OpenRouter => self.has_openrouter_key(),
181 LlmProvider::HuggingFace => self.has_huggingface_key(),
182 LlmProvider::Custom => self.config.custom_endpoint.is_some(),
183 LlmProvider::Auto => false, }
185 }
186
187 pub async fn synthesize_answer(
189 &self,
190 query: &str,
191 results: &[RagSearchResult],
192 ) -> Result<LlmResponse> {
193 let provider = self
194 .get_best_provider()
195 .ok_or_else(|| anyhow!("No LLM provider available"))?;
196
197 let start_time = std::time::Instant::now();
198
199 let response = match provider {
200 LlmProvider::OpenAI => self.synthesize_with_openai(query, results).await,
201 LlmProvider::Anthropic => self.synthesize_with_anthropic(query, results).await,
202 LlmProvider::Groq => self.synthesize_with_groq(query, results).await,
203 LlmProvider::OpenRouter => self.synthesize_with_openrouter(query, results).await,
204 LlmProvider::HuggingFace => self.synthesize_with_huggingface(query, results).await,
205 LlmProvider::Custom => self.synthesize_with_custom(query, results).await,
206 LlmProvider::Auto => unreachable!(),
207 };
208
209 match response {
211 Ok(mut resp) => {
212 resp.response_time_ms = start_time.elapsed().as_millis() as u64;
213 Ok(resp)
214 }
215 Err(e) => {
216 log::warn!("Primary provider {:?} failed: {}", provider, e);
217 self.try_fallback_providers(query, results, &provider).await
218 }
219 }
220 }
221
222 async fn try_fallback_providers(
224 &self,
225 query: &str,
226 results: &[RagSearchResult],
227 failed_provider: &LlmProvider,
228 ) -> Result<LlmResponse> {
229 for provider in &self.config.fallback_providers {
230 if provider != failed_provider && self.is_provider_available(provider) {
231 log::info!("Trying fallback provider: {:?}", provider);
232
233 let start_time = std::time::Instant::now();
234 let response = match provider {
235 LlmProvider::OpenAI => self.synthesize_with_openai(query, results).await,
236 LlmProvider::Anthropic => self.synthesize_with_anthropic(query, results).await,
237 LlmProvider::Groq => self.synthesize_with_groq(query, results).await,
238 LlmProvider::OpenRouter => {
239 self.synthesize_with_openrouter(query, results).await
240 }
241 LlmProvider::HuggingFace => {
242 self.synthesize_with_huggingface(query, results).await
243 }
244 LlmProvider::Custom => self.synthesize_with_custom(query, results).await,
245 LlmProvider::Auto => continue,
246 };
247
248 if let Ok(mut resp) = response {
249 resp.response_time_ms = start_time.elapsed().as_millis() as u64;
250 return Ok(resp);
251 }
252 }
253 }
254
255 Err(anyhow!("All LLM providers failed"))
256 }
257
258 fn get_model_name(&self, provider: &LlmProvider) -> String {
260 if let Some(model) = &self.config.model_name {
261 return model.clone();
262 }
263
264 match provider {
265 LlmProvider::OpenAI => "gpt-4o-mini".to_string(),
266 LlmProvider::Anthropic => "claude-3-haiku-20240307".to_string(),
267 LlmProvider::Groq => "llama-3.1-8b-instant".to_string(),
268 LlmProvider::OpenRouter => "openai/gpt-3.5-turbo".to_string(),
269 LlmProvider::HuggingFace => "microsoft/DialoGPT-medium".to_string(),
270 LlmProvider::Custom => "custom-model".to_string(),
271 LlmProvider::Auto => "auto".to_string(),
272 }
273 }
274
275 fn create_system_prompt(&self) -> String {
277 r#"You are a concise technical documentation assistant. Provide clear, scannable answers based ONLY on the provided search results.
278
279RESPONSE FORMAT:
2801. **Quick Answer** (1-2 sentences max)
2812. **Key Points** (bullet points, max 4 items)
2823. **Code Example** (if available - keep it short and practical)
283
284RULES:
285- Be extremely concise and scannable
286- Use bullet points and short paragraphs
287- Only include essential information
288- Cite sources as [Source N]
289- Never add information not in the sources
290- Focus on what developers need to know immediately
291
292STYLE:
293- Write for busy developers who want quick answers
294- Use clear, simple language
295- Keep code examples minimal but complete
296- Prioritize readability over completeness"#.to_string()
297 }
298
299 fn create_user_prompt(&self, query: &str, results: &[RagSearchResult]) -> String {
301 let mut prompt = format!("Question: {}\n\nSearch Results:\n\n", query);
302
303 for (i, result) in results.iter().enumerate() {
304 prompt.push_str(&format!(
305 "[Source {}] {}\nURL: {}\nContent: {}\n\n",
306 i + 1,
307 result.title.as_ref().unwrap_or(&"Untitled".to_string()),
308 result.source_path.to_string_lossy(),
309 result.content.chars().take(1000).collect::<String>()
310 ));
311 }
312
313 prompt.push_str("\nPlease provide a comprehensive answer based on these search results.");
314 prompt
315 }
316
317 fn extract_citations(&self, response_text: &str, results: &[RagSearchResult]) -> Vec<Citation> {
319 let mut citations = Vec::new();
320
321 for (i, result) in results.iter().enumerate() {
323 let source_ref = format!("[Source {}]", i + 1);
324 if response_text.contains(&source_ref) {
325 citations.push(Citation {
326 source_id: result.id.clone(),
327 source_title: result
328 .title
329 .clone()
330 .unwrap_or_else(|| "Untitled".to_string()),
331 source_url: Some(result.source_path.to_string_lossy().to_string()),
332 relevance_score: result.score,
333 excerpt: result.content.chars().take(200).collect(),
334 });
335 }
336 }
337
338 citations
339 }
340
341 async fn synthesize_with_openai(
343 &self,
344 query: &str,
345 results: &[RagSearchResult],
346 ) -> Result<LlmResponse> {
347 let api_key = self
348 .config
349 .openai_api_key
350 .as_ref()
351 .ok_or_else(|| anyhow!("OpenAI API key not configured"))?;
352
353 let model = self.get_model_name(&LlmProvider::OpenAI);
354 let system_prompt = self.create_system_prompt();
355 let user_prompt = self.create_user_prompt(query, results);
356
357 let payload = serde_json::json!({
358 "model": model,
359 "messages": [
360 {
361 "role": "system",
362 "content": system_prompt
363 },
364 {
365 "role": "user",
366 "content": user_prompt
367 }
368 ],
369 "max_tokens": self.config.max_tokens,
370 "temperature": self.config.temperature,
371 "stream": self.config.streaming
372 });
373
374 let response = self
375 .http_client
376 .post("https://api.openai.com/v1/chat/completions")
377 .header("Authorization", format!("Bearer {}", api_key))
378 .header("Content-Type", "application/json")
379 .json(&payload)
380 .send()
381 .await?;
382
383 if !response.status().is_success() {
384 let error_text = response.text().await?;
385 return Err(anyhow!("OpenAI API error: {}", error_text));
386 }
387
388 let response_json: serde_json::Value = response.json().await?;
389
390 let answer = response_json["choices"][0]["message"]["content"]
391 .as_str()
392 .ok_or_else(|| anyhow!("Invalid OpenAI response format"))?
393 .to_string();
394
395 let usage = &response_json["usage"];
396 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
397 let finish_reason = response_json["choices"][0]["finish_reason"]
398 .as_str()
399 .map(|s| s.to_string());
400
401 let citations = self.extract_citations(&answer, results);
402
403 Ok(LlmResponse {
404 answer,
405 sources_used: results.iter().map(|r| r.id.clone()).collect(),
406 confidence: Some(0.9), provider_used: LlmProvider::OpenAI,
408 model_used: model,
409 tokens_used,
410 response_time_ms: 0, finish_reason,
412 citations,
413 })
414 }
415
416 async fn synthesize_with_anthropic(
418 &self,
419 query: &str,
420 results: &[RagSearchResult],
421 ) -> Result<LlmResponse> {
422 let api_key = self
423 .config
424 .anthropic_api_key
425 .as_ref()
426 .ok_or_else(|| anyhow!("Anthropic API key not configured"))?;
427
428 let model = self.get_model_name(&LlmProvider::Anthropic);
429 let system_prompt = self.create_system_prompt();
430 let user_prompt = self.create_user_prompt(query, results);
431
432 let payload = serde_json::json!({
433 "model": model,
434 "max_tokens": self.config.max_tokens,
435 "temperature": self.config.temperature,
436 "system": system_prompt,
437 "messages": [
438 {
439 "role": "user",
440 "content": user_prompt
441 }
442 ]
443 });
444
445 let response = self
446 .http_client
447 .post("https://api.anthropic.com/v1/messages")
448 .header("x-api-key", api_key)
449 .header("content-type", "application/json")
450 .header("anthropic-version", "2023-06-01")
451 .json(&payload)
452 .send()
453 .await?;
454
455 if !response.status().is_success() {
456 let error_text = response.text().await?;
457 return Err(anyhow!("Anthropic API error: {}", error_text));
458 }
459
460 let response_json: serde_json::Value = response.json().await?;
461
462 let answer = response_json["content"][0]["text"]
463 .as_str()
464 .ok_or_else(|| anyhow!("Invalid Anthropic response format"))?
465 .to_string();
466
467 let usage = &response_json["usage"];
468 let tokens_used = usage["output_tokens"].as_u64().map(|t| t as u32);
469 let finish_reason = response_json["stop_reason"].as_str().map(|s| s.to_string());
470
471 let citations = self.extract_citations(&answer, results);
472
473 Ok(LlmResponse {
474 answer,
475 sources_used: results.iter().map(|r| r.id.clone()).collect(),
476 confidence: Some(0.85), provider_used: LlmProvider::Anthropic,
478 model_used: model,
479 tokens_used,
480 response_time_ms: 0,
481 finish_reason,
482 citations,
483 })
484 }
485
486 async fn synthesize_with_groq(
488 &self,
489 query: &str,
490 results: &[RagSearchResult],
491 ) -> Result<LlmResponse> {
492 let api_key = self
493 .config
494 .groq_api_key
495 .as_ref()
496 .ok_or_else(|| anyhow!("Groq API key not configured"))?;
497
498 let model = self.get_model_name(&LlmProvider::Groq);
499 let system_prompt = self.create_system_prompt();
500 let user_prompt = self.create_user_prompt(query, results);
501
502 let payload = serde_json::json!({
503 "model": model,
504 "messages": [
505 {
506 "role": "system",
507 "content": system_prompt
508 },
509 {
510 "role": "user",
511 "content": user_prompt
512 }
513 ],
514 "max_tokens": self.config.max_tokens,
515 "temperature": self.config.temperature,
516 "stream": false
517 });
518
519 let response = self
520 .http_client
521 .post("https://api.groq.com/openai/v1/chat/completions")
522 .header("Authorization", format!("Bearer {}", api_key))
523 .header("Content-Type", "application/json")
524 .json(&payload)
525 .send()
526 .await?;
527
528 if !response.status().is_success() {
529 let status = response.status();
530 let error_text = response.text().await?;
531 log::error!(
532 "Groq API error - Status: {}, Response: {}",
533 status,
534 error_text
535 );
536 return Err(anyhow!("Groq API error ({}): {}", status, error_text));
537 }
538
539 let response_json: serde_json::Value = response.json().await?;
540
541 let answer = response_json["choices"][0]["message"]["content"]
542 .as_str()
543 .ok_or_else(|| anyhow!("Invalid Groq response format"))?
544 .to_string();
545
546 let usage = &response_json["usage"];
547 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
548 let finish_reason = response_json["choices"][0]["finish_reason"]
549 .as_str()
550 .map(|s| s.to_string());
551
552 let citations = self.extract_citations(&answer, results);
553
554 Ok(LlmResponse {
555 answer,
556 sources_used: results.iter().map(|r| r.id.clone()).collect(),
557 confidence: Some(0.8), provider_used: LlmProvider::Groq,
559 model_used: model,
560 tokens_used,
561 response_time_ms: 0,
562 finish_reason,
563 citations,
564 })
565 }
566
567 async fn synthesize_with_openrouter(
569 &self,
570 query: &str,
571 results: &[RagSearchResult],
572 ) -> Result<LlmResponse> {
573 let api_key = self
574 .config
575 .openrouter_api_key
576 .as_ref()
577 .ok_or_else(|| anyhow!("OpenRouter API key not configured"))?;
578
579 let model = self.get_model_name(&LlmProvider::OpenRouter);
580 let system_prompt = self.create_system_prompt();
581 let user_prompt = self.create_user_prompt(query, results);
582
583 let payload = serde_json::json!({
584 "model": model,
585 "messages": [
586 {
587 "role": "system",
588 "content": system_prompt
589 },
590 {
591 "role": "user",
592 "content": user_prompt
593 }
594 ],
595 "max_tokens": self.config.max_tokens,
596 "temperature": self.config.temperature,
597 "stream": self.config.streaming
598 });
599
600 let response = self
601 .http_client
602 .post("https://openrouter.ai/api/v1/chat/completions")
603 .header("Authorization", format!("Bearer {}", api_key))
604 .header("Content-Type", "application/json")
605 .header("HTTP-Referer", "https://github.com/neur0map/manx")
606 .header("X-Title", "Manx Documentation Finder")
607 .json(&payload)
608 .send()
609 .await?;
610
611 if !response.status().is_success() {
612 let error_text = response.text().await?;
613 return Err(anyhow!("OpenRouter API error: {}", error_text));
614 }
615
616 let response_json: serde_json::Value = response.json().await?;
617
618 let answer = response_json["choices"][0]["message"]["content"]
619 .as_str()
620 .ok_or_else(|| anyhow!("Invalid OpenRouter response format"))?
621 .to_string();
622
623 let usage = &response_json["usage"];
624 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
625 let finish_reason = response_json["choices"][0]["finish_reason"]
626 .as_str()
627 .map(|s| s.to_string());
628
629 let citations = self.extract_citations(&answer, results);
630
631 Ok(LlmResponse {
632 answer,
633 sources_used: results.iter().map(|r| r.id.clone()).collect(),
634 confidence: Some(0.82), provider_used: LlmProvider::OpenRouter,
636 model_used: model,
637 tokens_used,
638 response_time_ms: 0,
639 finish_reason,
640 citations,
641 })
642 }
643
644 async fn synthesize_with_huggingface(
646 &self,
647 query: &str,
648 results: &[RagSearchResult],
649 ) -> Result<LlmResponse> {
650 let api_key = self
651 .config
652 .huggingface_api_key
653 .as_ref()
654 .ok_or_else(|| anyhow!("HuggingFace API key not configured"))?;
655
656 let model = self.get_model_name(&LlmProvider::HuggingFace);
657 let system_prompt = self.create_system_prompt();
658 let user_prompt = self.create_user_prompt(query, results);
659
660 let payload = serde_json::json!({
662 "model": model,
663 "messages": [
664 {"role": "system", "content": system_prompt},
665 {"role": "user", "content": user_prompt}
666 ],
667 "max_tokens": self.config.max_tokens,
668 "temperature": self.config.temperature
669 });
670
671 let response = self
672 .http_client
673 .post("https://router.huggingface.co/v1/chat/completions")
674 .header("Authorization", format!("Bearer {}", api_key))
675 .header("Content-Type", "application/json")
676 .json(&payload)
677 .send()
678 .await?;
679
680 if !response.status().is_success() {
681 let error_text = response.text().await?;
682 return Err(anyhow!("HuggingFace API error: {}", error_text));
683 }
684
685 let response_json: serde_json::Value = response.json().await?;
686
687 let answer = if let Some(choices) = response_json["choices"].as_array() {
688 if let Some(first_choice) = choices.first() {
689 if let Some(message) = first_choice["message"].as_object() {
690 message["content"].as_str().unwrap_or("").to_string()
691 } else {
692 return Err(anyhow!(
693 "Invalid HuggingFace response format: missing message"
694 ));
695 }
696 } else {
697 return Err(anyhow!(
698 "Invalid HuggingFace response format: empty choices"
699 ));
700 }
701 } else {
702 return Err(anyhow!(
703 "Invalid HuggingFace response format: missing choices"
704 ));
705 };
706
707 let citations = self.extract_citations(&answer, results);
708
709 Ok(LlmResponse {
710 answer,
711 sources_used: results.iter().map(|r| r.id.clone()).collect(),
712 confidence: Some(0.75), provider_used: LlmProvider::HuggingFace,
714 model_used: model,
715 tokens_used: response_json["usage"]["total_tokens"]
716 .as_u64()
717 .map(|t| t as u32),
718 response_time_ms: 0,
719 finish_reason: response_json["choices"][0]["finish_reason"]
720 .as_str()
721 .map(|s| s.to_string()),
722 citations,
723 })
724 }
725
726 async fn synthesize_with_custom(
728 &self,
729 query: &str,
730 results: &[RagSearchResult],
731 ) -> Result<LlmResponse> {
732 let endpoint = self
733 .config
734 .custom_endpoint
735 .as_ref()
736 .ok_or_else(|| anyhow!("Custom endpoint not configured"))?;
737
738 let model = self.get_model_name(&LlmProvider::Custom);
739 let system_prompt = self.create_system_prompt();
740 let user_prompt = self.create_user_prompt(query, results);
741
742 let payload = serde_json::json!({
744 "model": model,
745 "messages": [
746 {
747 "role": "system",
748 "content": system_prompt
749 },
750 {
751 "role": "user",
752 "content": user_prompt
753 }
754 ],
755 "max_tokens": self.config.max_tokens,
756 "temperature": self.config.temperature,
757 "stream": self.config.streaming
758 });
759
760 let response = self
761 .http_client
762 .post(format!("{}/v1/chat/completions", endpoint))
763 .header("Content-Type", "application/json")
764 .json(&payload)
765 .send()
766 .await?;
767
768 if !response.status().is_success() {
769 let error_text = response.text().await?;
770 return Err(anyhow!("Custom endpoint error: {}", error_text));
771 }
772
773 let response_json: serde_json::Value = response.json().await?;
774
775 let answer = response_json["choices"][0]["message"]["content"]
776 .as_str()
777 .ok_or_else(|| anyhow!("Invalid custom endpoint response format"))?
778 .to_string();
779
780 let usage = &response_json["usage"];
781 let tokens_used = usage
782 .get("total_tokens")
783 .and_then(|t| t.as_u64())
784 .map(|t| t as u32);
785 let finish_reason = response_json["choices"][0]
786 .get("finish_reason")
787 .and_then(|r| r.as_str())
788 .map(|s| s.to_string());
789
790 let citations = self.extract_citations(&answer, results);
791
792 Ok(LlmResponse {
793 answer,
794 sources_used: results.iter().map(|r| r.id.clone()).collect(),
795 confidence: Some(0.8), provider_used: LlmProvider::Custom,
797 model_used: model,
798 tokens_used,
799 response_time_ms: 0,
800 finish_reason,
801 citations,
802 })
803 }
804}