1use anyhow::{anyhow, Result};
7use serde::{Deserialize, Serialize};
8
9use crate::rag::RagSearchResult;
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct LlmConfig {
14 pub openai_api_key: Option<String>,
15 pub anthropic_api_key: Option<String>,
16 pub groq_api_key: Option<String>,
17 pub openrouter_api_key: Option<String>,
18 pub huggingface_api_key: Option<String>,
19 pub zai_api_key: Option<String>,
20 pub custom_endpoint: Option<String>,
21 pub preferred_provider: LlmProvider,
22 pub fallback_providers: Vec<LlmProvider>,
23 pub timeout_seconds: u64,
24 pub max_tokens: u32,
25 pub temperature: f32,
26 pub model_name: Option<String>,
27 pub streaming: bool,
28}
29
30impl Default for LlmConfig {
31 fn default() -> Self {
32 Self {
33 openai_api_key: None,
34 anthropic_api_key: None,
35 groq_api_key: None,
36 openrouter_api_key: None,
37 huggingface_api_key: None,
38 zai_api_key: None,
39 custom_endpoint: None,
40 preferred_provider: LlmProvider::Auto,
41 fallback_providers: vec![
42 LlmProvider::OpenAI,
43 LlmProvider::Anthropic,
44 LlmProvider::Groq,
45 LlmProvider::OpenRouter,
46 LlmProvider::Zai,
47 ],
48 timeout_seconds: 30,
49 max_tokens: 1000,
50 temperature: 0.1,
51 model_name: None,
52 streaming: false,
53 }
54 }
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
59pub enum LlmProvider {
60 Auto,
61 OpenAI,
62 Anthropic,
63 Groq,
64 OpenRouter,
65 HuggingFace,
66 Zai,
67 Custom,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct LlmResponse {
73 pub answer: String,
74 pub sources_used: Vec<String>,
75 pub confidence: Option<f32>,
76 pub provider_used: LlmProvider,
77 pub model_used: String,
78 pub tokens_used: Option<u32>,
79 pub response_time_ms: u64,
80 pub finish_reason: Option<String>,
81 pub citations: Vec<Citation>,
82}
83
84#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct Citation {
87 pub source_id: String,
88 pub source_title: String,
89 pub source_url: Option<String>,
90 pub relevance_score: f32,
91 pub excerpt: String,
92}
93
94#[derive(Clone)]
96pub struct LlmClient {
97 pub(crate) config: LlmConfig,
98 pub(crate) http_client: reqwest::Client,
99}
100
101impl LlmClient {
102 pub fn new(config: LlmConfig) -> Result<Self> {
104 let http_client = reqwest::Client::builder()
105 .timeout(std::time::Duration::from_secs(config.timeout_seconds))
106 .build()?;
107
108 Ok(Self {
109 config,
110 http_client,
111 })
112 }
113
114 pub fn is_available(&self) -> bool {
116 self.has_openai_key()
117 || self.has_anthropic_key()
118 || self.has_groq_key()
119 || self.has_openrouter_key()
120 || self.has_huggingface_key()
121 || self.has_zai_key()
122 || self.config.custom_endpoint.is_some()
123 }
124
125 pub fn has_openai_key(&self) -> bool {
127 self.config
128 .openai_api_key
129 .as_ref()
130 .is_some_and(|key| !key.is_empty())
131 }
132
133 pub fn has_anthropic_key(&self) -> bool {
134 self.config
135 .anthropic_api_key
136 .as_ref()
137 .is_some_and(|key| !key.is_empty())
138 }
139
140 pub fn has_groq_key(&self) -> bool {
141 self.config
142 .groq_api_key
143 .as_ref()
144 .is_some_and(|key| !key.is_empty())
145 }
146
147 pub fn has_openrouter_key(&self) -> bool {
148 self.config
149 .openrouter_api_key
150 .as_ref()
151 .is_some_and(|key| !key.is_empty())
152 }
153
154 pub fn has_huggingface_key(&self) -> bool {
155 self.config
156 .huggingface_api_key
157 .as_ref()
158 .is_some_and(|key| !key.is_empty())
159 }
160
161 pub fn has_zai_key(&self) -> bool {
162 self.config
163 .zai_api_key
164 .as_ref()
165 .is_some_and(|key| !key.is_empty())
166 }
167
168 pub fn get_best_provider(&self) -> Option<LlmProvider> {
170 if self.config.preferred_provider != LlmProvider::Auto {
171 if self.is_provider_available(&self.config.preferred_provider) {
173 return Some(self.config.preferred_provider.clone());
174 }
175 }
176
177 for provider in &self.config.fallback_providers {
179 if self.is_provider_available(provider) {
180 return Some(provider.clone());
181 }
182 }
183
184 None
185 }
186
187 pub fn is_provider_available(&self, provider: &LlmProvider) -> bool {
189 match provider {
190 LlmProvider::OpenAI => self.has_openai_key(),
191 LlmProvider::Anthropic => self.has_anthropic_key(),
192 LlmProvider::Groq => self.has_groq_key(),
193 LlmProvider::OpenRouter => self.has_openrouter_key(),
194 LlmProvider::HuggingFace => self.has_huggingface_key(),
195 LlmProvider::Zai => self.has_zai_key(),
196 LlmProvider::Custom => self.config.custom_endpoint.is_some(),
197 LlmProvider::Auto => false, }
199 }
200
201 pub async fn synthesize_answer(
203 &self,
204 query: &str,
205 results: &[RagSearchResult],
206 ) -> Result<LlmResponse> {
207 let provider = self
208 .get_best_provider()
209 .ok_or_else(|| anyhow!("No LLM provider available"))?;
210
211 let start_time = std::time::Instant::now();
212
213 let response = match provider {
214 LlmProvider::OpenAI => self.synthesize_with_openai(query, results).await,
215 LlmProvider::Anthropic => self.synthesize_with_anthropic(query, results).await,
216 LlmProvider::Groq => self.synthesize_with_groq(query, results).await,
217 LlmProvider::OpenRouter => self.synthesize_with_openrouter(query, results).await,
218 LlmProvider::HuggingFace => self.synthesize_with_huggingface(query, results).await,
219 LlmProvider::Zai => self.synthesize_with_zai(query, results).await,
220 LlmProvider::Custom => self.synthesize_with_custom(query, results).await,
221 LlmProvider::Auto => unreachable!(),
222 };
223
224 match response {
226 Ok(mut resp) => {
227 resp.response_time_ms = start_time.elapsed().as_millis() as u64;
228 Ok(resp)
229 }
230 Err(e) => {
231 log::warn!("Primary provider {:?} failed: {}", provider, e);
232 self.try_fallback_providers(query, results, &provider).await
233 }
234 }
235 }
236
237 async fn try_fallback_providers(
239 &self,
240 query: &str,
241 results: &[RagSearchResult],
242 failed_provider: &LlmProvider,
243 ) -> Result<LlmResponse> {
244 for provider in &self.config.fallback_providers {
245 if provider != failed_provider && self.is_provider_available(provider) {
246 log::info!("Trying fallback provider: {:?}", provider);
247
248 let start_time = std::time::Instant::now();
249 let response = match provider {
250 LlmProvider::OpenAI => self.synthesize_with_openai(query, results).await,
251 LlmProvider::Anthropic => self.synthesize_with_anthropic(query, results).await,
252 LlmProvider::Groq => self.synthesize_with_groq(query, results).await,
253 LlmProvider::OpenRouter => {
254 self.synthesize_with_openrouter(query, results).await
255 }
256 LlmProvider::HuggingFace => {
257 self.synthesize_with_huggingface(query, results).await
258 }
259 LlmProvider::Zai => self.synthesize_with_zai(query, results).await,
260 LlmProvider::Custom => self.synthesize_with_custom(query, results).await,
261 LlmProvider::Auto => continue,
262 };
263
264 if let Ok(mut resp) = response {
265 resp.response_time_ms = start_time.elapsed().as_millis() as u64;
266 return Ok(resp);
267 }
268 }
269 }
270
271 Err(anyhow!("All LLM providers failed"))
272 }
273
274 fn get_model_name(&self, provider: &LlmProvider) -> String {
276 if let Some(model) = &self.config.model_name {
277 return model.clone();
278 }
279
280 match provider {
281 LlmProvider::OpenAI => "gpt-4o-mini".to_string(),
282 LlmProvider::Anthropic => "claude-3-haiku-20240307".to_string(),
283 LlmProvider::Groq => "llama-3.1-8b-instant".to_string(),
284 LlmProvider::OpenRouter => "openai/gpt-3.5-turbo".to_string(),
285 LlmProvider::HuggingFace => "microsoft/DialoGPT-medium".to_string(),
286 LlmProvider::Zai => "glm-4.7".to_string(),
287 LlmProvider::Custom => "custom-model".to_string(),
288 LlmProvider::Auto => "auto".to_string(),
289 }
290 }
291
292 fn create_system_prompt(&self) -> String {
294 r#"You are a concise technical documentation assistant. Provide clear, scannable answers based ONLY on the provided search results.
295
296RESPONSE FORMAT:
2971. **Quick Answer** (1-2 sentences max)
2982. **Key Points** (bullet points, max 4 items)
2993. **Code Example** (if available - keep it short and practical)
300
301RULES:
302- Be extremely concise and scannable
303- Use bullet points and short paragraphs
304- Only include essential information
305- Cite sources as [Source N]
306- Never add information not in the sources
307- Focus on what developers need to know immediately
308
309STYLE:
310- Write for busy developers who want quick answers
311- Use clear, simple language
312- Keep code examples minimal but complete
313- Prioritize readability over completeness"#.to_string()
314 }
315
316 fn create_user_prompt(&self, query: &str, results: &[RagSearchResult]) -> String {
318 let mut prompt = format!("Question: {}\n\nSearch Results:\n\n", query);
319
320 for (i, result) in results.iter().enumerate() {
321 prompt.push_str(&format!(
322 "[Source {}] {}\nURL: {}\nContent: {}\n\n",
323 i + 1,
324 result.title.as_ref().unwrap_or(&"Untitled".to_string()),
325 result.source_path.to_string_lossy(),
326 result.content.chars().take(1000).collect::<String>()
327 ));
328 }
329
330 prompt.push_str("\nPlease provide a comprehensive answer based on these search results.");
331 prompt
332 }
333
334 fn extract_final_answer(&self, response_text: &str) -> String {
336 Self::extract_final_answer_text(response_text)
337 }
338
339 pub(crate) fn extract_final_answer_text(response_text: &str) -> String {
341 if response_text.contains("<thinking>") && response_text.contains("</thinking>") {
343 if let Some(thinking_end) = response_text.find("</thinking>") {
345 let after_thinking = &response_text[thinking_end + "</thinking>".len()..];
346 return after_thinking.trim().to_string();
347 }
348 }
349
350 if response_text.contains("<think>") && response_text.contains("</think>") {
352 if let Some(think_end) = response_text.find("</think>") {
354 let after_think = &response_text[think_end + "</think>".len()..];
355 return after_think.trim().to_string();
356 }
357 }
358
359 if response_text.starts_with("Let me think") || response_text.starts_with("I need to think")
362 {
363 let transition_phrases = [
365 "Here's my answer:",
366 "My answer is:",
367 "To answer your question:",
368 "Based on the search results:",
369 "The answer is:",
370 "\n\n**", "\n\nQuick Answer:",
372 "\n\n##", ];
374
375 for phrase in &transition_phrases {
376 if let Some(pos) = response_text.find(phrase) {
377 let answer_start = if phrase.starts_with('\n') {
378 pos + 2 } else {
380 pos + phrase.len()
381 };
382 return response_text[answer_start..].trim().to_string();
383 }
384 }
385 }
386
387 response_text.to_string()
389 }
390
391 fn extract_citations(&self, response_text: &str, results: &[RagSearchResult]) -> Vec<Citation> {
393 let mut citations = Vec::new();
394
395 for (i, result) in results.iter().enumerate() {
397 let source_ref = format!("[Source {}]", i + 1);
398 if response_text.contains(&source_ref) {
399 citations.push(Citation {
400 source_id: result.id.clone(),
401 source_title: result
402 .title
403 .clone()
404 .unwrap_or_else(|| "Untitled".to_string()),
405 source_url: Some(result.source_path.to_string_lossy().to_string()),
406 relevance_score: result.score,
407 excerpt: result.content.chars().take(200).collect(),
408 });
409 }
410 }
411
412 citations
413 }
414
415 async fn synthesize_with_openai(
417 &self,
418 query: &str,
419 results: &[RagSearchResult],
420 ) -> Result<LlmResponse> {
421 let api_key = self
422 .config
423 .openai_api_key
424 .as_ref()
425 .ok_or_else(|| anyhow!("OpenAI API key not configured"))?;
426
427 let model = self.get_model_name(&LlmProvider::OpenAI);
428 let system_prompt = self.create_system_prompt();
429 let user_prompt = self.create_user_prompt(query, results);
430
431 let payload = serde_json::json!({
432 "model": model,
433 "messages": [
434 {
435 "role": "system",
436 "content": system_prompt
437 },
438 {
439 "role": "user",
440 "content": user_prompt
441 }
442 ],
443 "max_tokens": self.config.max_tokens,
444 "temperature": self.config.temperature,
445 "stream": self.config.streaming
446 });
447
448 let response = self
449 .http_client
450 .post("https://api.openai.com/v1/chat/completions")
451 .header("Authorization", format!("Bearer {}", api_key))
452 .header("Content-Type", "application/json")
453 .json(&payload)
454 .send()
455 .await?;
456
457 if !response.status().is_success() {
458 let error_text = response.text().await?;
459 return Err(anyhow!("OpenAI API error: {}", error_text));
460 }
461
462 let response_json: serde_json::Value = response.json().await?;
463
464 let raw_answer = response_json["choices"][0]["message"]["content"]
465 .as_str()
466 .ok_or_else(|| anyhow!("Invalid OpenAI response format"))?;
467 let answer = self.extract_final_answer(raw_answer);
468
469 let usage = &response_json["usage"];
470 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
471 let finish_reason = response_json["choices"][0]["finish_reason"]
472 .as_str()
473 .map(|s| s.to_string());
474
475 let citations = self.extract_citations(&answer, results);
476
477 Ok(LlmResponse {
478 answer,
479 sources_used: results.iter().map(|r| r.id.clone()).collect(),
480 confidence: Some(0.9), provider_used: LlmProvider::OpenAI,
482 model_used: model,
483 tokens_used,
484 response_time_ms: 0, finish_reason,
486 citations,
487 })
488 }
489
490 async fn synthesize_with_anthropic(
492 &self,
493 query: &str,
494 results: &[RagSearchResult],
495 ) -> Result<LlmResponse> {
496 let api_key = self
497 .config
498 .anthropic_api_key
499 .as_ref()
500 .ok_or_else(|| anyhow!("Anthropic API key not configured"))?;
501
502 let model = self.get_model_name(&LlmProvider::Anthropic);
503 let system_prompt = self.create_system_prompt();
504 let user_prompt = self.create_user_prompt(query, results);
505
506 let payload = serde_json::json!({
507 "model": model,
508 "max_tokens": self.config.max_tokens,
509 "temperature": self.config.temperature,
510 "system": system_prompt,
511 "messages": [
512 {
513 "role": "user",
514 "content": user_prompt
515 }
516 ]
517 });
518
519 let response = self
520 .http_client
521 .post("https://api.anthropic.com/v1/messages")
522 .header("x-api-key", api_key)
523 .header("content-type", "application/json")
524 .header("anthropic-version", "2023-06-01")
525 .json(&payload)
526 .send()
527 .await?;
528
529 if !response.status().is_success() {
530 let error_text = response.text().await?;
531 return Err(anyhow!("Anthropic API error: {}", error_text));
532 }
533
534 let response_json: serde_json::Value = response.json().await?;
535
536 let raw_answer = response_json["content"][0]["text"]
537 .as_str()
538 .ok_or_else(|| anyhow!("Invalid Anthropic response format"))?;
539 let answer = self.extract_final_answer(raw_answer);
540
541 let usage = &response_json["usage"];
542 let tokens_used = usage["output_tokens"].as_u64().map(|t| t as u32);
543 let finish_reason = response_json["stop_reason"].as_str().map(|s| s.to_string());
544
545 let citations = self.extract_citations(&answer, results);
546
547 Ok(LlmResponse {
548 answer,
549 sources_used: results.iter().map(|r| r.id.clone()).collect(),
550 confidence: Some(0.85), provider_used: LlmProvider::Anthropic,
552 model_used: model,
553 tokens_used,
554 response_time_ms: 0,
555 finish_reason,
556 citations,
557 })
558 }
559
560 async fn synthesize_with_groq(
562 &self,
563 query: &str,
564 results: &[RagSearchResult],
565 ) -> Result<LlmResponse> {
566 let api_key = self
567 .config
568 .groq_api_key
569 .as_ref()
570 .ok_or_else(|| anyhow!("Groq API key not configured"))?;
571
572 let model = self.get_model_name(&LlmProvider::Groq);
573 let system_prompt = self.create_system_prompt();
574 let user_prompt = self.create_user_prompt(query, results);
575
576 let payload = serde_json::json!({
577 "model": model,
578 "messages": [
579 {
580 "role": "system",
581 "content": system_prompt
582 },
583 {
584 "role": "user",
585 "content": user_prompt
586 }
587 ],
588 "max_tokens": self.config.max_tokens,
589 "temperature": self.config.temperature,
590 "stream": false
591 });
592
593 let response = self
594 .http_client
595 .post("https://api.groq.com/openai/v1/chat/completions")
596 .header("Authorization", format!("Bearer {}", api_key))
597 .header("Content-Type", "application/json")
598 .json(&payload)
599 .send()
600 .await?;
601
602 if !response.status().is_success() {
603 let status = response.status();
604 let error_text = response.text().await?;
605 log::error!(
606 "Groq API error - Status: {}, Response: {}",
607 status,
608 error_text
609 );
610 return Err(anyhow!("Groq API error ({}): {}", status, error_text));
611 }
612
613 let response_json: serde_json::Value = response.json().await?;
614
615 let raw_answer = response_json["choices"][0]["message"]["content"]
616 .as_str()
617 .ok_or_else(|| anyhow!("Invalid Groq response format"))?;
618 let answer = self.extract_final_answer(raw_answer);
619
620 let usage = &response_json["usage"];
621 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
622 let finish_reason = response_json["choices"][0]["finish_reason"]
623 .as_str()
624 .map(|s| s.to_string());
625
626 let citations = self.extract_citations(&answer, results);
627
628 Ok(LlmResponse {
629 answer,
630 sources_used: results.iter().map(|r| r.id.clone()).collect(),
631 confidence: Some(0.8), provider_used: LlmProvider::Groq,
633 model_used: model,
634 tokens_used,
635 response_time_ms: 0,
636 finish_reason,
637 citations,
638 })
639 }
640
641 async fn synthesize_with_openrouter(
643 &self,
644 query: &str,
645 results: &[RagSearchResult],
646 ) -> Result<LlmResponse> {
647 let api_key = self
648 .config
649 .openrouter_api_key
650 .as_ref()
651 .ok_or_else(|| anyhow!("OpenRouter API key not configured"))?;
652
653 let model = self.get_model_name(&LlmProvider::OpenRouter);
654 let system_prompt = self.create_system_prompt();
655 let user_prompt = self.create_user_prompt(query, results);
656
657 let payload = serde_json::json!({
658 "model": model,
659 "messages": [
660 {
661 "role": "system",
662 "content": system_prompt
663 },
664 {
665 "role": "user",
666 "content": user_prompt
667 }
668 ],
669 "max_tokens": self.config.max_tokens,
670 "temperature": self.config.temperature,
671 "stream": self.config.streaming
672 });
673
674 let response = self
675 .http_client
676 .post("https://openrouter.ai/api/v1/chat/completions")
677 .header("Authorization", format!("Bearer {}", api_key))
678 .header("Content-Type", "application/json")
679 .header("HTTP-Referer", "https://github.com/neur0map/manx")
680 .header("X-Title", "Manx Documentation Finder")
681 .json(&payload)
682 .send()
683 .await?;
684
685 if !response.status().is_success() {
686 let error_text = response.text().await?;
687 return Err(anyhow!("OpenRouter API error: {}", error_text));
688 }
689
690 let response_json: serde_json::Value = response.json().await?;
691
692 let raw_answer = response_json["choices"][0]["message"]["content"]
693 .as_str()
694 .ok_or_else(|| anyhow!("Invalid OpenRouter response format"))?;
695 let answer = self.extract_final_answer(raw_answer);
696
697 let usage = &response_json["usage"];
698 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
699 let finish_reason = response_json["choices"][0]["finish_reason"]
700 .as_str()
701 .map(|s| s.to_string());
702
703 let citations = self.extract_citations(&answer, results);
704
705 Ok(LlmResponse {
706 answer,
707 sources_used: results.iter().map(|r| r.id.clone()).collect(),
708 confidence: Some(0.82), provider_used: LlmProvider::OpenRouter,
710 model_used: model,
711 tokens_used,
712 response_time_ms: 0,
713 finish_reason,
714 citations,
715 })
716 }
717
718 async fn synthesize_with_huggingface(
720 &self,
721 query: &str,
722 results: &[RagSearchResult],
723 ) -> Result<LlmResponse> {
724 let api_key = self
725 .config
726 .huggingface_api_key
727 .as_ref()
728 .ok_or_else(|| anyhow!("HuggingFace API key not configured"))?;
729
730 let model = self.get_model_name(&LlmProvider::HuggingFace);
731 let system_prompt = self.create_system_prompt();
732 let user_prompt = self.create_user_prompt(query, results);
733
734 let payload = serde_json::json!({
736 "model": model,
737 "messages": [
738 {"role": "system", "content": system_prompt},
739 {"role": "user", "content": user_prompt}
740 ],
741 "max_tokens": self.config.max_tokens,
742 "temperature": self.config.temperature
743 });
744
745 let response = self
746 .http_client
747 .post("https://router.huggingface.co/v1/chat/completions")
748 .header("Authorization", format!("Bearer {}", api_key))
749 .header("Content-Type", "application/json")
750 .json(&payload)
751 .send()
752 .await?;
753
754 if !response.status().is_success() {
755 let error_text = response.text().await?;
756 return Err(anyhow!("HuggingFace API error: {}", error_text));
757 }
758
759 let response_json: serde_json::Value = response.json().await?;
760
761 let raw_answer = if let Some(choices) = response_json["choices"].as_array() {
762 if let Some(first_choice) = choices.first() {
763 if let Some(message) = first_choice["message"].as_object() {
764 message["content"].as_str().unwrap_or("")
765 } else {
766 return Err(anyhow!(
767 "Invalid HuggingFace response format: missing message"
768 ));
769 }
770 } else {
771 return Err(anyhow!(
772 "Invalid HuggingFace response format: empty choices"
773 ));
774 }
775 } else {
776 return Err(anyhow!(
777 "Invalid HuggingFace response format: missing choices"
778 ));
779 };
780
781 let answer = self.extract_final_answer(raw_answer);
782
783 let citations = self.extract_citations(&answer, results);
784
785 Ok(LlmResponse {
786 answer,
787 sources_used: results.iter().map(|r| r.id.clone()).collect(),
788 confidence: Some(0.75), provider_used: LlmProvider::HuggingFace,
790 model_used: model,
791 tokens_used: response_json["usage"]["total_tokens"]
792 .as_u64()
793 .map(|t| t as u32),
794 response_time_ms: 0,
795 finish_reason: response_json["choices"][0]["finish_reason"]
796 .as_str()
797 .map(|s| s.to_string()),
798 citations,
799 })
800 }
801
802 async fn synthesize_with_zai(
804 &self,
805 query: &str,
806 results: &[RagSearchResult],
807 ) -> Result<LlmResponse> {
808 let api_key = self
809 .config
810 .zai_api_key
811 .as_ref()
812 .ok_or_else(|| anyhow!("Z.AI API key not configured"))?;
813
814 let model = self.get_model_name(&LlmProvider::Zai);
815 let system_prompt = self.create_system_prompt();
816 let user_prompt = self.create_user_prompt(query, results);
817
818 let payload = serde_json::json!({
819 "model": model.to_uppercase(), "messages": [
821 {
822 "role": "system",
823 "content": system_prompt
824 },
825 {
826 "role": "user",
827 "content": user_prompt
828 }
829 ],
830 "max_tokens": self.config.max_tokens,
831 "temperature": self.config.temperature,
832 "stream": false
833 });
834
835 let response = self
836 .http_client
837 .post("https://api.z.ai/api/coding/paas/v4/chat/completions")
838 .header("Authorization", format!("Bearer {}", api_key))
839 .header("Content-Type", "application/json")
840 .json(&payload)
841 .send()
842 .await?;
843
844 if !response.status().is_success() {
845 let error_text = response.text().await?;
846 return Err(anyhow!("Z.AI API error: {}", error_text));
847 }
848
849 let response_json: serde_json::Value = response.json().await?;
850
851 let raw_answer = response_json["choices"][0]["message"]["content"]
852 .as_str()
853 .ok_or_else(|| anyhow!("Invalid Z.AI response format"))?;
854 let answer = self.extract_final_answer(raw_answer);
855
856 let usage = &response_json["usage"];
857 let tokens_used = usage["total_tokens"].as_u64().map(|t| t as u32);
858 let finish_reason = response_json["choices"][0]["finish_reason"]
859 .as_str()
860 .map(|s| s.to_string());
861
862 let citations = self.extract_citations(&answer, results);
863
864 Ok(LlmResponse {
865 answer,
866 sources_used: results.iter().map(|r| r.id.clone()).collect(),
867 confidence: Some(0.88), provider_used: LlmProvider::Zai,
869 model_used: model,
870 tokens_used,
871 response_time_ms: 0,
872 finish_reason,
873 citations,
874 })
875 }
876
877 async fn synthesize_with_custom(
879 &self,
880 query: &str,
881 results: &[RagSearchResult],
882 ) -> Result<LlmResponse> {
883 let endpoint = self
884 .config
885 .custom_endpoint
886 .as_ref()
887 .ok_or_else(|| anyhow!("Custom endpoint not configured"))?;
888
889 let model = self.get_model_name(&LlmProvider::Custom);
890 let system_prompt = self.create_system_prompt();
891 let user_prompt = self.create_user_prompt(query, results);
892
893 let payload = serde_json::json!({
895 "model": model,
896 "messages": [
897 {
898 "role": "system",
899 "content": system_prompt
900 },
901 {
902 "role": "user",
903 "content": user_prompt
904 }
905 ],
906 "max_tokens": self.config.max_tokens,
907 "temperature": self.config.temperature,
908 "stream": self.config.streaming
909 });
910
911 let response = self
912 .http_client
913 .post(format!("{}/v1/chat/completions", endpoint))
914 .header("Content-Type", "application/json")
915 .json(&payload)
916 .send()
917 .await?;
918
919 if !response.status().is_success() {
920 let error_text = response.text().await?;
921 return Err(anyhow!("Custom endpoint error: {}", error_text));
922 }
923
924 let response_json: serde_json::Value = response.json().await?;
925
926 let raw_answer = response_json["choices"][0]["message"]["content"]
927 .as_str()
928 .ok_or_else(|| anyhow!("Invalid custom endpoint response format"))?;
929 let answer = self.extract_final_answer(raw_answer);
930
931 let usage = &response_json["usage"];
932 let tokens_used = usage
933 .get("total_tokens")
934 .and_then(|t| t.as_u64())
935 .map(|t| t as u32);
936 let finish_reason = response_json["choices"][0]
937 .get("finish_reason")
938 .and_then(|r| r.as_str())
939 .map(|s| s.to_string());
940
941 let citations = self.extract_citations(&answer, results);
942
943 Ok(LlmResponse {
944 answer,
945 sources_used: results.iter().map(|r| r.id.clone()).collect(),
946 confidence: Some(0.8), provider_used: LlmProvider::Custom,
948 model_used: model,
949 tokens_used,
950 response_time_ms: 0,
951 finish_reason,
952 citations,
953 })
954 }
955}
956
957#[cfg(test)]
958mod tests {
959 use super::*;
960
961 #[test]
962 fn test_extract_final_answer_with_thinking_tags() {
963 let response_with_thinking = r#"<thinking>
964Let me analyze this query about Rust error handling.
965
966The user is asking about Result types and how to handle errors properly.
967I should explain the basics of Result<T, E> and common patterns.
968</thinking>
969
970**Quick Answer**
971Rust uses `Result<T, E>` for error handling, where `T` is the success type and `E` is the error type.
972
973**Key Points**
974- Use `?` operator for error propagation
975- `unwrap()` panics on error, avoid in production
976- `expect()` provides custom panic message
977- Pattern match with `match` for comprehensive handling"#;
978
979 let extracted = LlmClient::extract_final_answer_text(response_with_thinking);
980
981 assert!(!extracted.contains("<thinking>"));
982 assert!(!extracted.contains("</thinking>"));
983 assert!(extracted.contains("**Quick Answer**"));
984 assert!(extracted.contains("Result<T, E>"));
985 }
986
987 #[test]
988 fn test_extract_final_answer_with_think_tags() {
989 let response_with_think = r#"<think>
990This question is about JavaScript async/await patterns.
991
992The user wants to understand how to handle asynchronous operations.
993I should provide clear examples and best practices.
994</think>
995
996**Quick Answer**
997Use `async/await` for handling asynchronous operations in JavaScript.
998
999**Key Points**
1000- `async` functions return Promises
1001- `await` pauses execution until Promise resolves
1002- Use try/catch for error handling
1003- Avoid callback hell with Promise chains"#;
1004
1005 let extracted = LlmClient::extract_final_answer_text(response_with_think);
1006
1007 assert!(!extracted.contains("<think>"));
1008 assert!(!extracted.contains("</think>"));
1009 assert!(extracted.contains("**Quick Answer**"));
1010 assert!(extracted.contains("async/await"));
1011 }
1012
1013 #[test]
1014 fn test_extract_final_answer_without_thinking() {
1015 let normal_response = r#"**Quick Answer**
1016This is a normal response without thinking tags.
1017
1018**Key Points**
1019- Point 1
1020- Point 2"#;
1021
1022 let extracted = LlmClient::extract_final_answer_text(normal_response);
1023
1024 assert_eq!(extracted, normal_response);
1025 }
1026
1027 #[test]
1028 fn test_extract_final_answer_with_thinking_prefix() {
1029 let response_with_prefix = r#"Let me think about this question carefully...
1030
1031I need to consider the different aspects of the query.
1032
1033Based on the search results:
1034
1035**Quick Answer**
1036Here is the actual answer after thinking.
1037
1038**Key Points**
1039- Important point 1
1040- Important point 2"#;
1041
1042 let extracted = LlmClient::extract_final_answer_text(response_with_prefix);
1043
1044 assert!(!extracted.contains("Let me think"));
1045 assert!(extracted.contains("**Quick Answer**"));
1046 assert!(extracted.contains("Here is the actual answer"));
1047 }
1048}