manx_cli/rag/
result_verifier.rs

1//! Result verification system for RAG search quality assurance
2//!
3//! This module provides intelligent result verification using LLM when available,
4//! with statistical fallback methods to ensure search results are relevant.
5
6use anyhow::Result;
7use serde_json::json;
8
9use crate::rag::{
10    llm::LlmClient,
11    query_enhancer::{EnhancedQuery, QueryIntent},
12    RagSearchResult, SmartSearchConfig,
13};
14
15/// Verified search result with confidence scoring
16#[derive(Debug, Clone)]
17pub struct VerifiedResult {
18    pub result: RagSearchResult,
19    pub confidence_score: f32,
20    #[allow(dead_code)]
21    pub relevance_explanation: Option<String>,
22    #[allow(dead_code)]
23    pub extracted_context: Option<String>,
24    #[allow(dead_code)]
25    pub verification_method: VerificationMethod,
26}
27
28/// Method used for result verification
29#[derive(Debug, Clone)]
30pub enum VerificationMethod {
31    LlmBased,
32    Statistical,
33    #[allow(dead_code)]
34    Keyword,
35    #[allow(dead_code)]
36    Hybrid,
37}
38
39/// Result verification system
40pub struct ResultVerifier {
41    llm_client: Option<LlmClient>,
42    config: SmartSearchConfig,
43}
44
45impl ResultVerifier {
46    /// Create a new result verifier
47    pub fn new(llm_client: Option<LlmClient>, config: SmartSearchConfig) -> Self {
48        Self { llm_client, config }
49    }
50
51    /// Verify and score search results based on query relevance
52    pub async fn verify_results(
53        &self,
54        query: &EnhancedQuery,
55        results: Vec<RagSearchResult>,
56    ) -> Result<Vec<VerifiedResult>> {
57        log::debug!(
58            "Verifying {} results for query: '{}'",
59            results.len(),
60            query.original
61        );
62
63        let mut verified_results = Vec::new();
64
65        for result in results {
66            let verified = self.verify_single_result(query, result).await?;
67
68            // Only include results that meet the confidence threshold
69            if verified.confidence_score >= self.config.min_confidence_score {
70                verified_results.push(verified);
71            } else {
72                log::debug!(
73                    "Filtered out result with confidence {} < threshold {}",
74                    verified.confidence_score,
75                    self.config.min_confidence_score
76                );
77            }
78        }
79
80        // Sort by confidence score (highest first)
81        verified_results
82            .sort_by(|a, b| b.confidence_score.partial_cmp(&a.confidence_score).unwrap());
83
84        log::debug!(
85            "Verification complete: {} results passed threshold",
86            verified_results.len()
87        );
88
89        Ok(verified_results)
90    }
91
92    /// Verify a single search result
93    async fn verify_single_result(
94        &self,
95        query: &EnhancedQuery,
96        result: RagSearchResult,
97    ) -> Result<VerifiedResult> {
98        // Try LLM verification first if available
99        if let Some(ref llm_client) = self.llm_client {
100            if self.config.enable_result_verification {
101                match self.verify_with_llm(query, &result, llm_client).await {
102                    Ok(verified) => return Ok(verified),
103                    Err(e) => {
104                        log::warn!("LLM verification failed, using fallback: {}", e);
105                    }
106                }
107            }
108        }
109
110        // Fallback to statistical verification
111        Ok(self.verify_with_fallback(query, result))
112    }
113
114    /// Verify result using LLM
115    async fn verify_with_llm(
116        &self,
117        query: &EnhancedQuery,
118        result: &RagSearchResult,
119        llm_client: &LlmClient,
120    ) -> Result<VerifiedResult> {
121        let system_prompt = self.build_verification_prompt(&query.detected_intent);
122
123        let user_message = format!(
124            "Query: \"{}\"\n\nContent to verify:\n{}\n\nPlease analyze if this content actually answers or relates to the query. Respond in JSON format with:\n- 'relevant': boolean\n- 'confidence': 0.0-1.0 score\n- 'explanation': brief reason\n- 'key_context': most relevant excerpt (max 200 chars)",
125            query.original,
126            self.truncate_content(&result.content, 1000)
127        );
128
129        // Simplified LLM call (placeholder)
130        let response = self
131            .call_llm_for_verification(llm_client, &system_prompt, &user_message)
132            .await?;
133
134        let parsed_response: serde_json::Value =
135            serde_json::from_str(&response).unwrap_or_else(|_| {
136                json!({
137                    "relevant": true,
138                    "confidence": 0.5,
139                    "explanation": "Unable to parse LLM response",
140                    "key_context": null
141                })
142            });
143
144        let confidence = parsed_response["confidence"].as_f64().unwrap_or(0.5) as f32;
145
146        let explanation = parsed_response["explanation"]
147            .as_str()
148            .map(|s| s.to_string());
149
150        let key_context = parsed_response["key_context"]
151            .as_str()
152            .map(|s| s.to_string());
153
154        Ok(VerifiedResult {
155            result: result.clone(),
156            confidence_score: confidence,
157            relevance_explanation: explanation,
158            extracted_context: key_context,
159            verification_method: VerificationMethod::LlmBased,
160        })
161    }
162
163    /// Build verification prompt based on query intent
164    fn build_verification_prompt(&self, intent: &QueryIntent) -> String {
165        match intent {
166            QueryIntent::CodeSearch { language, component_type } => {
167                format!(
168                    "You are a code search verification expert. Analyze if content contains relevant {} {} code. Focus on function definitions, implementations, and usage patterns.",
169                    component_type.as_deref().unwrap_or("programming"),
170                    language.as_deref().unwrap_or("code")
171                )
172            },
173            QueryIntent::Documentation => {
174                "You are a documentation verification expert. Analyze if content provides explanatory information, guides, or instructional material relevant to the query.".to_string()
175            },
176            QueryIntent::Configuration => {
177                "You are a configuration verification expert. Analyze if content contains settings, environment variables, or configuration patterns relevant to the query.".to_string()
178            },
179            QueryIntent::Debugging => {
180                "You are a debugging verification expert. Analyze if content contains error solutions, troubleshooting steps, or problem resolution information.".to_string()
181            },
182            _ => {
183                "You are a relevance verification expert. Analyze if the content is relevant to the search query.".to_string()
184            }
185        }
186    }
187
188    /// Placeholder for LLM verification call
189    async fn call_llm_for_verification(
190        &self,
191        _llm_client: &LlmClient,
192        _system_prompt: &str,
193        _user_message: &str,
194    ) -> Result<String> {
195        // Placeholder implementation
196        Ok(json!({
197            "relevant": true,
198            "confidence": 0.7,
199            "explanation": "Content appears relevant to query",
200            "key_context": null
201        })
202        .to_string())
203    }
204
205    /// Fallback verification using statistical methods
206    fn verify_with_fallback(
207        &self,
208        query: &EnhancedQuery,
209        result: RagSearchResult,
210    ) -> VerifiedResult {
211        let mut confidence_score = result.score; // Start with embedding similarity
212
213        // Keyword matching boost
214        let keyword_score = self.calculate_keyword_score(&query.original, &result.content);
215        confidence_score = (confidence_score + keyword_score) / 2.0;
216
217        // Intent-specific scoring adjustments
218        confidence_score =
219            self.apply_intent_adjustments(confidence_score, &query.detected_intent, &result);
220
221        // Apply query variation matching
222        let variation_score = self.calculate_variation_score(query, &result.content);
223        confidence_score = (confidence_score * 0.7) + (variation_score * 0.3);
224
225        // Extract key context using simple heuristics
226        let extracted_context = self.extract_key_context(&query.original, &result.content);
227
228        VerifiedResult {
229            result,
230            confidence_score: confidence_score.clamp(0.0, 1.0),
231            relevance_explanation: Some("Statistical relevance analysis".to_string()),
232            extracted_context,
233            verification_method: VerificationMethod::Statistical,
234        }
235    }
236
237    /// Calculate keyword-based relevance score
238    fn calculate_keyword_score(&self, query: &str, content: &str) -> f32 {
239        let query_lower = query.to_lowercase();
240        let query_words: Vec<&str> = query_lower
241            .split_whitespace()
242            .filter(|w| w.len() > 2) // Skip very short words
243            .collect();
244
245        if query_words.is_empty() {
246            return 0.0;
247        }
248
249        let content_lower = content.to_lowercase();
250        let matches = query_words
251            .iter()
252            .filter(|&&word| content_lower.contains(word))
253            .count();
254
255        matches as f32 / query_words.len() as f32
256    }
257
258    /// Apply intent-specific scoring adjustments
259    fn apply_intent_adjustments(
260        &self,
261        base_score: f32,
262        intent: &QueryIntent,
263        result: &RagSearchResult,
264    ) -> f32 {
265        let mut adjusted_score = base_score;
266
267        match intent {
268            QueryIntent::CodeSearch {
269                language,
270                component_type,
271            } => {
272                // Boost if content appears to be code
273                if self.looks_like_code(&result.content) {
274                    adjusted_score *= 1.2;
275                }
276
277                // Boost if language matches
278                if let Some(lang) = language {
279                    if result.content.to_lowercase().contains(&lang.to_lowercase()) {
280                        adjusted_score *= 1.1;
281                    }
282                }
283
284                // Boost if component type matches
285                if let Some(comp_type) = component_type {
286                    if result
287                        .content
288                        .to_lowercase()
289                        .contains(&comp_type.to_lowercase())
290                    {
291                        adjusted_score *= 1.15;
292                    }
293                }
294            }
295            QueryIntent::Documentation => {
296                // Boost documentation-like files
297                if result.source_path.to_string_lossy().contains("doc")
298                    || result.source_path.to_string_lossy().contains("readme")
299                    || result.source_path.to_string_lossy().ends_with(".md")
300                {
301                    adjusted_score *= 1.1;
302                }
303            }
304            QueryIntent::Configuration => {
305                // Boost config-like files
306                if result.source_path.to_string_lossy().contains("config")
307                    || result.source_path.to_string_lossy().ends_with(".json")
308                    || result.source_path.to_string_lossy().ends_with(".yaml")
309                    || result.source_path.to_string_lossy().ends_with(".toml")
310                {
311                    adjusted_score *= 1.2;
312                }
313            }
314            _ => {}
315        }
316
317        adjusted_score
318    }
319
320    /// Check if content looks like code
321    fn looks_like_code(&self, content: &str) -> bool {
322        let code_indicators = [
323            "function", "class", "struct", "impl", "def", "fn", "public", "private", "const",
324            "let", "var", "import", "use", "include", "package", "{", "}", "(", ")", ";", "=>",
325            "->",
326        ];
327
328        let indicator_count = code_indicators
329            .iter()
330            .filter(|&&indicator| content.contains(indicator))
331            .count();
332
333        indicator_count >= 3
334    }
335
336    /// Calculate score based on query variations
337    fn calculate_variation_score(&self, query: &EnhancedQuery, content: &str) -> f32 {
338        let mut best_score: f32 = 0.0;
339
340        for variation in &query.variations {
341            let score = self.calculate_keyword_score(&variation.query, content) * variation.weight;
342            best_score = best_score.max(score);
343        }
344
345        best_score
346    }
347
348    /// Extract most relevant context from content
349    fn extract_key_context(&self, query: &str, content: &str) -> Option<String> {
350        let query_lower = query.to_lowercase();
351        let query_words: Vec<&str> = query_lower.split_whitespace().collect();
352
353        // Find sentences containing query terms
354        let sentences: Vec<&str> = content
355            .split(['.', '\n', ';'])
356            .filter(|s| !s.trim().is_empty())
357            .collect();
358
359        let mut best_sentence = "";
360        let mut best_score = 0;
361
362        for sentence in sentences {
363            let sentence_lower = sentence.to_lowercase();
364            let matches = query_words
365                .iter()
366                .filter(|&&word| sentence_lower.contains(word))
367                .count();
368
369            if matches > best_score {
370                best_score = matches;
371                best_sentence = sentence;
372            }
373        }
374
375        if best_score > 0 {
376            Some(self.truncate_content(best_sentence.trim(), 200))
377        } else {
378            None
379        }
380    }
381
382    /// Truncate content to specified length (Unicode-safe)
383    fn truncate_content(&self, content: &str, max_length: usize) -> String {
384        if content.len() <= max_length {
385            content.to_string()
386        } else {
387            // Use char_indices to find a valid Unicode boundary
388            let mut truncate_at = max_length;
389            while truncate_at > 0 && !content.is_char_boundary(truncate_at) {
390                truncate_at -= 1;
391            }
392            format!("{}...", &content[..truncate_at])
393        }
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400    use crate::rag::{DocumentMetadata, SourceType};
401    use std::path::PathBuf;
402
403    #[allow(dead_code)]
404    fn create_test_result(content: &str, score: f32) -> RagSearchResult {
405        RagSearchResult {
406            id: "test".to_string(),
407            content: content.to_string(),
408            source_path: PathBuf::from("test.rs"),
409            source_type: SourceType::Local,
410            title: None,
411            section: None,
412            score,
413            chunk_index: 0,
414            metadata: DocumentMetadata {
415                file_type: "rust".to_string(),
416                size: 100,
417                modified: chrono::Utc::now(),
418                tags: vec![],
419                language: Some("rust".to_string()),
420            },
421        }
422    }
423
424    #[tokio::test]
425    async fn test_keyword_scoring() {
426        let verifier = ResultVerifier::new(None, SmartSearchConfig::default());
427
428        let score = verifier.calculate_keyword_score(
429            "validate security function",
430            "fn validate_security() { /* security validation */ }",
431        );
432
433        assert!(score > 0.5);
434    }
435
436    #[tokio::test]
437    async fn test_code_detection() {
438        let verifier = ResultVerifier::new(None, SmartSearchConfig::default());
439
440        assert!(verifier.looks_like_code("fn main() { println!(\"hello\"); }"));
441        assert!(!verifier.looks_like_code("This is just regular text content."));
442    }
443}