Skip to main content

manx_cli/rag/
result_verifier.rs

1//! Result verification system for RAG search quality assurance
2//!
3//! This module provides intelligent result verification using LLM when available,
4//! with statistical fallback methods to ensure search results are relevant.
5
6use anyhow::Result;
7use serde_json::json;
8use std::sync::Arc;
9
10use crate::rag::{
11    llm::LlmClient,
12    query_enhancer::{EnhancedQuery, QueryIntent},
13    RagSearchResult, SmartSearchConfig,
14};
15
16/// Verified search result with confidence scoring
17#[derive(Debug, Clone)]
18pub struct VerifiedResult {
19    pub result: RagSearchResult,
20    pub confidence_score: f32,
21    #[allow(dead_code)]
22    pub relevance_explanation: Option<String>,
23    #[allow(dead_code)]
24    pub extracted_context: Option<String>,
25    #[allow(dead_code)]
26    pub verification_method: VerificationMethod,
27}
28
29/// Method used for result verification
30#[derive(Debug, Clone)]
31pub enum VerificationMethod {
32    LlmBased,
33    Statistical,
34    #[allow(dead_code)]
35    Keyword,
36    #[allow(dead_code)]
37    Hybrid,
38}
39
40/// Result verification system
41pub struct ResultVerifier {
42    llm_client: Option<Arc<LlmClient>>,
43    config: SmartSearchConfig,
44}
45
46impl ResultVerifier {
47    /// Create a new result verifier
48    pub fn new(llm_client: Option<Arc<LlmClient>>, config: SmartSearchConfig) -> Self {
49        Self { llm_client, config }
50    }
51
52    /// Verify and score search results based on query relevance
53    pub async fn verify_results(
54        &self,
55        query: &EnhancedQuery,
56        results: Vec<RagSearchResult>,
57    ) -> Result<Vec<VerifiedResult>> {
58        log::debug!(
59            "Verifying {} results for query: '{}'",
60            results.len(),
61            query.original
62        );
63
64        let mut verified_results = Vec::new();
65
66        for result in results {
67            let verified = self.verify_single_result(query, result).await?;
68
69            // Only include results that meet the confidence threshold
70            if verified.confidence_score >= self.config.min_confidence_score {
71                verified_results.push(verified);
72            } else {
73                log::debug!(
74                    "Filtered out result with confidence {} < threshold {}",
75                    verified.confidence_score,
76                    self.config.min_confidence_score
77                );
78            }
79        }
80
81        // Sort by confidence score (highest first)
82        verified_results
83            .sort_by(|a, b| b.confidence_score.partial_cmp(&a.confidence_score).unwrap());
84
85        log::debug!(
86            "Verification complete: {} results passed threshold",
87            verified_results.len()
88        );
89
90        Ok(verified_results)
91    }
92
93    /// Verify a single search result
94    async fn verify_single_result(
95        &self,
96        query: &EnhancedQuery,
97        result: RagSearchResult,
98    ) -> Result<VerifiedResult> {
99        // Try LLM verification first if available
100        if let Some(ref llm_client) = self.llm_client {
101            if self.config.enable_result_verification {
102                match self.verify_with_llm(query, &result, llm_client).await {
103                    Ok(verified) => return Ok(verified),
104                    Err(e) => {
105                        log::warn!("LLM verification failed, using fallback: {}", e);
106                    }
107                }
108            }
109        }
110
111        // Fallback to statistical verification
112        Ok(self.verify_with_fallback(query, result))
113    }
114
115    /// Verify result using LLM
116    async fn verify_with_llm(
117        &self,
118        query: &EnhancedQuery,
119        result: &RagSearchResult,
120        llm_client: &LlmClient,
121    ) -> Result<VerifiedResult> {
122        let system_prompt = self.build_verification_prompt(&query.detected_intent);
123
124        let user_message = format!(
125            "Query: \"{}\"\n\nContent to verify:\n{}\n\nPlease analyze if this content actually answers or relates to the query. Respond in JSON format with:\n- 'relevant': boolean\n- 'confidence': 0.0-1.0 score\n- 'explanation': brief reason\n- 'key_context': most relevant excerpt (max 200 chars)",
126            query.original,
127            self.truncate_content(&result.content, 1000)
128        );
129
130        // Simplified LLM call (placeholder)
131        let response = self
132            .call_llm_for_verification(llm_client, &system_prompt, &user_message)
133            .await?;
134
135        let parsed_response: serde_json::Value =
136            serde_json::from_str(&response).unwrap_or_else(|_| {
137                json!({
138                    "relevant": true,
139                    "confidence": 0.5,
140                    "explanation": "Unable to parse LLM response",
141                    "key_context": null
142                })
143            });
144
145        let confidence = parsed_response["confidence"].as_f64().unwrap_or(0.5) as f32;
146
147        let explanation = parsed_response["explanation"]
148            .as_str()
149            .map(|s| s.to_string());
150
151        let key_context = parsed_response["key_context"]
152            .as_str()
153            .map(|s| s.to_string());
154
155        Ok(VerifiedResult {
156            result: result.clone(),
157            confidence_score: confidence,
158            relevance_explanation: explanation,
159            extracted_context: key_context,
160            verification_method: VerificationMethod::LlmBased,
161        })
162    }
163
164    /// Build verification prompt based on query intent
165    fn build_verification_prompt(&self, intent: &QueryIntent) -> String {
166        match intent {
167            QueryIntent::CodeSearch { language, component_type } => {
168                format!(
169                    "You are a code search verification expert. Analyze if content contains relevant {} {} code. Focus on function definitions, implementations, and usage patterns.",
170                    component_type.as_deref().unwrap_or("programming"),
171                    language.as_deref().unwrap_or("code")
172                )
173            },
174            QueryIntent::Documentation => {
175                "You are a documentation verification expert. Analyze if content provides explanatory information, guides, or instructional material relevant to the query.".to_string()
176            },
177            QueryIntent::Configuration => {
178                "You are a configuration verification expert. Analyze if content contains settings, environment variables, or configuration patterns relevant to the query.".to_string()
179            },
180            QueryIntent::Debugging => {
181                "You are a debugging verification expert. Analyze if content contains error solutions, troubleshooting steps, or problem resolution information.".to_string()
182            },
183            _ => {
184                "You are a relevance verification expert. Analyze if the content is relevant to the search query.".to_string()
185            }
186        }
187    }
188
189    /// Placeholder for LLM verification call
190    async fn call_llm_for_verification(
191        &self,
192        _llm_client: &LlmClient,
193        _system_prompt: &str,
194        _user_message: &str,
195    ) -> Result<String> {
196        // Placeholder implementation
197        Ok(json!({
198            "relevant": true,
199            "confidence": 0.7,
200            "explanation": "Content appears relevant to query",
201            "key_context": null
202        })
203        .to_string())
204    }
205
206    /// Fallback verification using statistical methods
207    fn verify_with_fallback(
208        &self,
209        query: &EnhancedQuery,
210        result: RagSearchResult,
211    ) -> VerifiedResult {
212        let mut confidence_score = result.score; // Start with embedding similarity
213
214        // Keyword matching boost
215        let keyword_score = self.calculate_keyword_score(&query.original, &result.content);
216        confidence_score = (confidence_score + keyword_score) / 2.0;
217
218        // Intent-specific scoring adjustments
219        confidence_score =
220            self.apply_intent_adjustments(confidence_score, &query.detected_intent, &result);
221
222        // Apply query variation matching
223        let variation_score = self.calculate_variation_score(query, &result.content);
224        confidence_score = (confidence_score * 0.7) + (variation_score * 0.3);
225
226        // Extract key context using simple heuristics
227        let extracted_context = self.extract_key_context(&query.original, &result.content);
228
229        VerifiedResult {
230            result,
231            confidence_score: confidence_score.clamp(0.0, 1.0),
232            relevance_explanation: Some("Statistical relevance analysis".to_string()),
233            extracted_context,
234            verification_method: VerificationMethod::Statistical,
235        }
236    }
237
238    /// Calculate keyword-based relevance score
239    fn calculate_keyword_score(&self, query: &str, content: &str) -> f32 {
240        let query_lower = query.to_lowercase();
241        let query_words: Vec<&str> = query_lower
242            .split_whitespace()
243            .filter(|w| w.len() > 2) // Skip very short words
244            .collect();
245
246        if query_words.is_empty() {
247            return 0.0;
248        }
249
250        let content_lower = content.to_lowercase();
251        let matches = query_words
252            .iter()
253            .filter(|&&word| content_lower.contains(word))
254            .count();
255
256        matches as f32 / query_words.len() as f32
257    }
258
259    /// Apply intent-specific scoring adjustments
260    fn apply_intent_adjustments(
261        &self,
262        base_score: f32,
263        intent: &QueryIntent,
264        result: &RagSearchResult,
265    ) -> f32 {
266        let mut adjusted_score = base_score;
267
268        match intent {
269            QueryIntent::CodeSearch {
270                language,
271                component_type,
272            } => {
273                // Boost if content appears to be code
274                if self.looks_like_code(&result.content) {
275                    adjusted_score *= 1.2;
276                }
277
278                // Boost if language matches
279                if let Some(lang) = language {
280                    if result.content.to_lowercase().contains(&lang.to_lowercase()) {
281                        adjusted_score *= 1.1;
282                    }
283                }
284
285                // Boost if component type matches
286                if let Some(comp_type) = component_type {
287                    if result
288                        .content
289                        .to_lowercase()
290                        .contains(&comp_type.to_lowercase())
291                    {
292                        adjusted_score *= 1.15;
293                    }
294                }
295            }
296            QueryIntent::Documentation => {
297                // Boost documentation-like files
298                if result.source_path.to_string_lossy().contains("doc")
299                    || result.source_path.to_string_lossy().contains("readme")
300                    || result.source_path.to_string_lossy().ends_with(".md")
301                {
302                    adjusted_score *= 1.1;
303                }
304            }
305            QueryIntent::Configuration => {
306                // Boost config-like files
307                if result.source_path.to_string_lossy().contains("config")
308                    || result.source_path.to_string_lossy().ends_with(".json")
309                    || result.source_path.to_string_lossy().ends_with(".yaml")
310                    || result.source_path.to_string_lossy().ends_with(".toml")
311                {
312                    adjusted_score *= 1.2;
313                }
314            }
315            _ => {}
316        }
317
318        adjusted_score
319    }
320
321    /// Check if content looks like code
322    fn looks_like_code(&self, content: &str) -> bool {
323        let code_indicators = [
324            "function", "class", "struct", "impl", "def", "fn", "public", "private", "const",
325            "let", "var", "import", "use", "include", "package", "{", "}", "(", ")", ";", "=>",
326            "->",
327        ];
328
329        let indicator_count = code_indicators
330            .iter()
331            .filter(|&&indicator| content.contains(indicator))
332            .count();
333
334        indicator_count >= 3
335    }
336
337    /// Calculate score based on query variations
338    fn calculate_variation_score(&self, query: &EnhancedQuery, content: &str) -> f32 {
339        let mut best_score: f32 = 0.0;
340
341        for variation in &query.variations {
342            let score = self.calculate_keyword_score(&variation.query, content) * variation.weight;
343            best_score = best_score.max(score);
344        }
345
346        best_score
347    }
348
349    /// Extract most relevant context from content
350    fn extract_key_context(&self, query: &str, content: &str) -> Option<String> {
351        let query_lower = query.to_lowercase();
352        let query_words: Vec<&str> = query_lower.split_whitespace().collect();
353
354        // Find sentences containing query terms
355        let sentences: Vec<&str> = content
356            .split(['.', '\n', ';'])
357            .filter(|s| !s.trim().is_empty())
358            .collect();
359
360        let mut best_sentence = "";
361        let mut best_score = 0;
362
363        for sentence in sentences {
364            let sentence_lower = sentence.to_lowercase();
365            let matches = query_words
366                .iter()
367                .filter(|&&word| sentence_lower.contains(word))
368                .count();
369
370            if matches > best_score {
371                best_score = matches;
372                best_sentence = sentence;
373            }
374        }
375
376        if best_score > 0 {
377            Some(self.truncate_content(best_sentence.trim(), 200))
378        } else {
379            None
380        }
381    }
382
383    /// Truncate content to specified length (Unicode-safe)
384    fn truncate_content(&self, content: &str, max_length: usize) -> String {
385        if content.len() <= max_length {
386            content.to_string()
387        } else {
388            // Use char_indices to find a valid Unicode boundary
389            let mut truncate_at = max_length;
390            while truncate_at > 0 && !content.is_char_boundary(truncate_at) {
391                truncate_at -= 1;
392            }
393            format!("{}...", &content[..truncate_at])
394        }
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401    use crate::rag::{DocumentMetadata, SourceType};
402    use std::path::PathBuf;
403
404    #[allow(dead_code)]
405    fn create_test_result(content: &str, score: f32) -> RagSearchResult {
406        RagSearchResult {
407            id: "test".to_string(),
408            content: content.to_string(),
409            source_path: PathBuf::from("test.rs"),
410            source_type: SourceType::Local,
411            title: None,
412            section: None,
413            score,
414            chunk_index: 0,
415            metadata: DocumentMetadata {
416                file_type: "rust".to_string(),
417                size: 100,
418                modified: chrono::Utc::now(),
419                tags: vec![],
420                language: Some("rust".to_string()),
421            },
422        }
423    }
424
425    #[tokio::test]
426    async fn test_keyword_scoring() {
427        let verifier = ResultVerifier::new(None, SmartSearchConfig::default());
428
429        let score = verifier.calculate_keyword_score(
430            "validate security function",
431            "fn validate_security() { /* security validation */ }",
432        );
433
434        assert!(score > 0.5);
435    }
436
437    #[tokio::test]
438    async fn test_code_detection() {
439        let verifier = ResultVerifier::new(None, SmartSearchConfig::default());
440
441        assert!(verifier.looks_like_code("fn main() { println!(\"hello\"); }"));
442        assert!(!verifier.looks_like_code("This is just regular text content."));
443    }
444}