1use anyhow::Result;
7use serde_json::json;
8
9use crate::rag::{
10 llm::LlmClient,
11 query_enhancer::{EnhancedQuery, QueryIntent},
12 RagSearchResult, SmartSearchConfig,
13};
14
15#[derive(Debug, Clone)]
17pub struct VerifiedResult {
18 pub result: RagSearchResult,
19 pub confidence_score: f32,
20 #[allow(dead_code)]
21 pub relevance_explanation: Option<String>,
22 #[allow(dead_code)]
23 pub extracted_context: Option<String>,
24 #[allow(dead_code)]
25 pub verification_method: VerificationMethod,
26}
27
28#[derive(Debug, Clone)]
30pub enum VerificationMethod {
31 LlmBased,
32 Statistical,
33 #[allow(dead_code)]
34 Keyword,
35 #[allow(dead_code)]
36 Hybrid,
37}
38
39pub struct ResultVerifier {
41 llm_client: Option<LlmClient>,
42 config: SmartSearchConfig,
43}
44
45impl ResultVerifier {
46 pub fn new(llm_client: Option<LlmClient>, config: SmartSearchConfig) -> Self {
48 Self { llm_client, config }
49 }
50
51 pub async fn verify_results(
53 &self,
54 query: &EnhancedQuery,
55 results: Vec<RagSearchResult>,
56 ) -> Result<Vec<VerifiedResult>> {
57 log::debug!(
58 "Verifying {} results for query: '{}'",
59 results.len(),
60 query.original
61 );
62
63 let mut verified_results = Vec::new();
64
65 for result in results {
66 let verified = self.verify_single_result(query, result).await?;
67
68 if verified.confidence_score >= self.config.min_confidence_score {
70 verified_results.push(verified);
71 } else {
72 log::debug!(
73 "Filtered out result with confidence {} < threshold {}",
74 verified.confidence_score,
75 self.config.min_confidence_score
76 );
77 }
78 }
79
80 verified_results
82 .sort_by(|a, b| b.confidence_score.partial_cmp(&a.confidence_score).unwrap());
83
84 log::debug!(
85 "Verification complete: {} results passed threshold",
86 verified_results.len()
87 );
88
89 Ok(verified_results)
90 }
91
92 async fn verify_single_result(
94 &self,
95 query: &EnhancedQuery,
96 result: RagSearchResult,
97 ) -> Result<VerifiedResult> {
98 if let Some(ref llm_client) = self.llm_client {
100 if self.config.enable_result_verification {
101 match self.verify_with_llm(query, &result, llm_client).await {
102 Ok(verified) => return Ok(verified),
103 Err(e) => {
104 log::warn!("LLM verification failed, using fallback: {}", e);
105 }
106 }
107 }
108 }
109
110 Ok(self.verify_with_fallback(query, result))
112 }
113
114 async fn verify_with_llm(
116 &self,
117 query: &EnhancedQuery,
118 result: &RagSearchResult,
119 llm_client: &LlmClient,
120 ) -> Result<VerifiedResult> {
121 let system_prompt = self.build_verification_prompt(&query.detected_intent);
122
123 let user_message = format!(
124 "Query: \"{}\"\n\nContent to verify:\n{}\n\nPlease analyze if this content actually answers or relates to the query. Respond in JSON format with:\n- 'relevant': boolean\n- 'confidence': 0.0-1.0 score\n- 'explanation': brief reason\n- 'key_context': most relevant excerpt (max 200 chars)",
125 query.original,
126 self.truncate_content(&result.content, 1000)
127 );
128
129 let response = self
131 .call_llm_for_verification(llm_client, &system_prompt, &user_message)
132 .await?;
133
134 let parsed_response: serde_json::Value =
135 serde_json::from_str(&response).unwrap_or_else(|_| {
136 json!({
137 "relevant": true,
138 "confidence": 0.5,
139 "explanation": "Unable to parse LLM response",
140 "key_context": null
141 })
142 });
143
144 let confidence = parsed_response["confidence"].as_f64().unwrap_or(0.5) as f32;
145
146 let explanation = parsed_response["explanation"]
147 .as_str()
148 .map(|s| s.to_string());
149
150 let key_context = parsed_response["key_context"]
151 .as_str()
152 .map(|s| s.to_string());
153
154 Ok(VerifiedResult {
155 result: result.clone(),
156 confidence_score: confidence,
157 relevance_explanation: explanation,
158 extracted_context: key_context,
159 verification_method: VerificationMethod::LlmBased,
160 })
161 }
162
163 fn build_verification_prompt(&self, intent: &QueryIntent) -> String {
165 match intent {
166 QueryIntent::CodeSearch { language, component_type } => {
167 format!(
168 "You are a code search verification expert. Analyze if content contains relevant {} {} code. Focus on function definitions, implementations, and usage patterns.",
169 component_type.as_deref().unwrap_or("programming"),
170 language.as_deref().unwrap_or("code")
171 )
172 },
173 QueryIntent::Documentation => {
174 "You are a documentation verification expert. Analyze if content provides explanatory information, guides, or instructional material relevant to the query.".to_string()
175 },
176 QueryIntent::Configuration => {
177 "You are a configuration verification expert. Analyze if content contains settings, environment variables, or configuration patterns relevant to the query.".to_string()
178 },
179 QueryIntent::Debugging => {
180 "You are a debugging verification expert. Analyze if content contains error solutions, troubleshooting steps, or problem resolution information.".to_string()
181 },
182 _ => {
183 "You are a relevance verification expert. Analyze if the content is relevant to the search query.".to_string()
184 }
185 }
186 }
187
188 async fn call_llm_for_verification(
190 &self,
191 _llm_client: &LlmClient,
192 _system_prompt: &str,
193 _user_message: &str,
194 ) -> Result<String> {
195 Ok(json!({
197 "relevant": true,
198 "confidence": 0.7,
199 "explanation": "Content appears relevant to query",
200 "key_context": null
201 })
202 .to_string())
203 }
204
205 fn verify_with_fallback(
207 &self,
208 query: &EnhancedQuery,
209 result: RagSearchResult,
210 ) -> VerifiedResult {
211 let mut confidence_score = result.score; let keyword_score = self.calculate_keyword_score(&query.original, &result.content);
215 confidence_score = (confidence_score + keyword_score) / 2.0;
216
217 confidence_score =
219 self.apply_intent_adjustments(confidence_score, &query.detected_intent, &result);
220
221 let variation_score = self.calculate_variation_score(query, &result.content);
223 confidence_score = (confidence_score * 0.7) + (variation_score * 0.3);
224
225 let extracted_context = self.extract_key_context(&query.original, &result.content);
227
228 VerifiedResult {
229 result,
230 confidence_score: confidence_score.clamp(0.0, 1.0),
231 relevance_explanation: Some("Statistical relevance analysis".to_string()),
232 extracted_context,
233 verification_method: VerificationMethod::Statistical,
234 }
235 }
236
237 fn calculate_keyword_score(&self, query: &str, content: &str) -> f32 {
239 let query_lower = query.to_lowercase();
240 let query_words: Vec<&str> = query_lower
241 .split_whitespace()
242 .filter(|w| w.len() > 2) .collect();
244
245 if query_words.is_empty() {
246 return 0.0;
247 }
248
249 let content_lower = content.to_lowercase();
250 let matches = query_words
251 .iter()
252 .filter(|&&word| content_lower.contains(word))
253 .count();
254
255 matches as f32 / query_words.len() as f32
256 }
257
258 fn apply_intent_adjustments(
260 &self,
261 base_score: f32,
262 intent: &QueryIntent,
263 result: &RagSearchResult,
264 ) -> f32 {
265 let mut adjusted_score = base_score;
266
267 match intent {
268 QueryIntent::CodeSearch {
269 language,
270 component_type,
271 } => {
272 if self.looks_like_code(&result.content) {
274 adjusted_score *= 1.2;
275 }
276
277 if let Some(lang) = language {
279 if result.content.to_lowercase().contains(&lang.to_lowercase()) {
280 adjusted_score *= 1.1;
281 }
282 }
283
284 if let Some(comp_type) = component_type {
286 if result
287 .content
288 .to_lowercase()
289 .contains(&comp_type.to_lowercase())
290 {
291 adjusted_score *= 1.15;
292 }
293 }
294 }
295 QueryIntent::Documentation => {
296 if result.source_path.to_string_lossy().contains("doc")
298 || result.source_path.to_string_lossy().contains("readme")
299 || result.source_path.to_string_lossy().ends_with(".md")
300 {
301 adjusted_score *= 1.1;
302 }
303 }
304 QueryIntent::Configuration => {
305 if result.source_path.to_string_lossy().contains("config")
307 || result.source_path.to_string_lossy().ends_with(".json")
308 || result.source_path.to_string_lossy().ends_with(".yaml")
309 || result.source_path.to_string_lossy().ends_with(".toml")
310 {
311 adjusted_score *= 1.2;
312 }
313 }
314 _ => {}
315 }
316
317 adjusted_score
318 }
319
320 fn looks_like_code(&self, content: &str) -> bool {
322 let code_indicators = [
323 "function", "class", "struct", "impl", "def", "fn", "public", "private", "const",
324 "let", "var", "import", "use", "include", "package", "{", "}", "(", ")", ";", "=>",
325 "->",
326 ];
327
328 let indicator_count = code_indicators
329 .iter()
330 .filter(|&&indicator| content.contains(indicator))
331 .count();
332
333 indicator_count >= 3
334 }
335
336 fn calculate_variation_score(&self, query: &EnhancedQuery, content: &str) -> f32 {
338 let mut best_score: f32 = 0.0;
339
340 for variation in &query.variations {
341 let score = self.calculate_keyword_score(&variation.query, content) * variation.weight;
342 best_score = best_score.max(score);
343 }
344
345 best_score
346 }
347
348 fn extract_key_context(&self, query: &str, content: &str) -> Option<String> {
350 let query_lower = query.to_lowercase();
351 let query_words: Vec<&str> = query_lower.split_whitespace().collect();
352
353 let sentences: Vec<&str> = content
355 .split(['.', '\n', ';'])
356 .filter(|s| !s.trim().is_empty())
357 .collect();
358
359 let mut best_sentence = "";
360 let mut best_score = 0;
361
362 for sentence in sentences {
363 let sentence_lower = sentence.to_lowercase();
364 let matches = query_words
365 .iter()
366 .filter(|&&word| sentence_lower.contains(word))
367 .count();
368
369 if matches > best_score {
370 best_score = matches;
371 best_sentence = sentence;
372 }
373 }
374
375 if best_score > 0 {
376 Some(self.truncate_content(best_sentence.trim(), 200))
377 } else {
378 None
379 }
380 }
381
382 fn truncate_content(&self, content: &str, max_length: usize) -> String {
384 if content.len() <= max_length {
385 content.to_string()
386 } else {
387 let mut truncate_at = max_length;
389 while truncate_at > 0 && !content.is_char_boundary(truncate_at) {
390 truncate_at -= 1;
391 }
392 format!("{}...", &content[..truncate_at])
393 }
394 }
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400 use crate::rag::{DocumentMetadata, SourceType};
401 use std::path::PathBuf;
402
403 #[allow(dead_code)]
404 fn create_test_result(content: &str, score: f32) -> RagSearchResult {
405 RagSearchResult {
406 id: "test".to_string(),
407 content: content.to_string(),
408 source_path: PathBuf::from("test.rs"),
409 source_type: SourceType::Local,
410 title: None,
411 section: None,
412 score,
413 chunk_index: 0,
414 metadata: DocumentMetadata {
415 file_type: "rust".to_string(),
416 size: 100,
417 modified: chrono::Utc::now(),
418 tags: vec![],
419 language: Some("rust".to_string()),
420 },
421 }
422 }
423
424 #[tokio::test]
425 async fn test_keyword_scoring() {
426 let verifier = ResultVerifier::new(None, SmartSearchConfig::default());
427
428 let score = verifier.calculate_keyword_score(
429 "validate security function",
430 "fn validate_security() { /* security validation */ }",
431 );
432
433 assert!(score > 0.5);
434 }
435
436 #[tokio::test]
437 async fn test_code_detection() {
438 let verifier = ResultVerifier::new(None, SmartSearchConfig::default());
439
440 assert!(verifier.looks_like_code("fn main() { println!(\"hello\"); }"));
441 assert!(!verifier.looks_like_code("This is just regular text content."));
442 }
443}