1use anyhow::Result;
7use serde_json::json;
8use std::sync::Arc;
9
10use crate::rag::{
11 llm::LlmClient,
12 query_enhancer::{EnhancedQuery, QueryIntent},
13 RagSearchResult, SmartSearchConfig,
14};
15
16#[derive(Debug, Clone)]
18pub struct VerifiedResult {
19 pub result: RagSearchResult,
20 pub confidence_score: f32,
21 #[allow(dead_code)]
22 pub relevance_explanation: Option<String>,
23 #[allow(dead_code)]
24 pub extracted_context: Option<String>,
25 #[allow(dead_code)]
26 pub verification_method: VerificationMethod,
27}
28
29#[derive(Debug, Clone)]
31pub enum VerificationMethod {
32 LlmBased,
33 Statistical,
34 #[allow(dead_code)]
35 Keyword,
36 #[allow(dead_code)]
37 Hybrid,
38}
39
40pub struct ResultVerifier {
42 llm_client: Option<Arc<LlmClient>>,
43 config: SmartSearchConfig,
44}
45
46impl ResultVerifier {
47 pub fn new(llm_client: Option<Arc<LlmClient>>, config: SmartSearchConfig) -> Self {
49 Self { llm_client, config }
50 }
51
52 pub async fn verify_results(
54 &self,
55 query: &EnhancedQuery,
56 results: Vec<RagSearchResult>,
57 ) -> Result<Vec<VerifiedResult>> {
58 log::debug!(
59 "Verifying {} results for query: '{}'",
60 results.len(),
61 query.original
62 );
63
64 let mut verified_results = Vec::new();
65
66 for result in results {
67 let verified = self.verify_single_result(query, result).await?;
68
69 if verified.confidence_score >= self.config.min_confidence_score {
71 verified_results.push(verified);
72 } else {
73 log::debug!(
74 "Filtered out result with confidence {} < threshold {}",
75 verified.confidence_score,
76 self.config.min_confidence_score
77 );
78 }
79 }
80
81 verified_results
83 .sort_by(|a, b| b.confidence_score.partial_cmp(&a.confidence_score).unwrap());
84
85 log::debug!(
86 "Verification complete: {} results passed threshold",
87 verified_results.len()
88 );
89
90 Ok(verified_results)
91 }
92
93 async fn verify_single_result(
95 &self,
96 query: &EnhancedQuery,
97 result: RagSearchResult,
98 ) -> Result<VerifiedResult> {
99 if let Some(ref llm_client) = self.llm_client {
101 if self.config.enable_result_verification {
102 match self.verify_with_llm(query, &result, llm_client).await {
103 Ok(verified) => return Ok(verified),
104 Err(e) => {
105 log::warn!("LLM verification failed, using fallback: {}", e);
106 }
107 }
108 }
109 }
110
111 Ok(self.verify_with_fallback(query, result))
113 }
114
115 async fn verify_with_llm(
117 &self,
118 query: &EnhancedQuery,
119 result: &RagSearchResult,
120 llm_client: &LlmClient,
121 ) -> Result<VerifiedResult> {
122 let system_prompt = self.build_verification_prompt(&query.detected_intent);
123
124 let user_message = format!(
125 "Query: \"{}\"\n\nContent to verify:\n{}\n\nPlease analyze if this content actually answers or relates to the query. Respond in JSON format with:\n- 'relevant': boolean\n- 'confidence': 0.0-1.0 score\n- 'explanation': brief reason\n- 'key_context': most relevant excerpt (max 200 chars)",
126 query.original,
127 self.truncate_content(&result.content, 1000)
128 );
129
130 let response = self
132 .call_llm_for_verification(llm_client, &system_prompt, &user_message)
133 .await?;
134
135 let parsed_response: serde_json::Value =
136 serde_json::from_str(&response).unwrap_or_else(|_| {
137 json!({
138 "relevant": true,
139 "confidence": 0.5,
140 "explanation": "Unable to parse LLM response",
141 "key_context": null
142 })
143 });
144
145 let confidence = parsed_response["confidence"].as_f64().unwrap_or(0.5) as f32;
146
147 let explanation = parsed_response["explanation"]
148 .as_str()
149 .map(|s| s.to_string());
150
151 let key_context = parsed_response["key_context"]
152 .as_str()
153 .map(|s| s.to_string());
154
155 Ok(VerifiedResult {
156 result: result.clone(),
157 confidence_score: confidence,
158 relevance_explanation: explanation,
159 extracted_context: key_context,
160 verification_method: VerificationMethod::LlmBased,
161 })
162 }
163
164 fn build_verification_prompt(&self, intent: &QueryIntent) -> String {
166 match intent {
167 QueryIntent::CodeSearch { language, component_type } => {
168 format!(
169 "You are a code search verification expert. Analyze if content contains relevant {} {} code. Focus on function definitions, implementations, and usage patterns.",
170 component_type.as_deref().unwrap_or("programming"),
171 language.as_deref().unwrap_or("code")
172 )
173 },
174 QueryIntent::Documentation => {
175 "You are a documentation verification expert. Analyze if content provides explanatory information, guides, or instructional material relevant to the query.".to_string()
176 },
177 QueryIntent::Configuration => {
178 "You are a configuration verification expert. Analyze if content contains settings, environment variables, or configuration patterns relevant to the query.".to_string()
179 },
180 QueryIntent::Debugging => {
181 "You are a debugging verification expert. Analyze if content contains error solutions, troubleshooting steps, or problem resolution information.".to_string()
182 },
183 _ => {
184 "You are a relevance verification expert. Analyze if the content is relevant to the search query.".to_string()
185 }
186 }
187 }
188
189 async fn call_llm_for_verification(
191 &self,
192 _llm_client: &LlmClient,
193 _system_prompt: &str,
194 _user_message: &str,
195 ) -> Result<String> {
196 Ok(json!({
198 "relevant": true,
199 "confidence": 0.7,
200 "explanation": "Content appears relevant to query",
201 "key_context": null
202 })
203 .to_string())
204 }
205
206 fn verify_with_fallback(
208 &self,
209 query: &EnhancedQuery,
210 result: RagSearchResult,
211 ) -> VerifiedResult {
212 let mut confidence_score = result.score; let keyword_score = self.calculate_keyword_score(&query.original, &result.content);
216 confidence_score = (confidence_score + keyword_score) / 2.0;
217
218 confidence_score =
220 self.apply_intent_adjustments(confidence_score, &query.detected_intent, &result);
221
222 let variation_score = self.calculate_variation_score(query, &result.content);
224 confidence_score = (confidence_score * 0.7) + (variation_score * 0.3);
225
226 let extracted_context = self.extract_key_context(&query.original, &result.content);
228
229 VerifiedResult {
230 result,
231 confidence_score: confidence_score.clamp(0.0, 1.0),
232 relevance_explanation: Some("Statistical relevance analysis".to_string()),
233 extracted_context,
234 verification_method: VerificationMethod::Statistical,
235 }
236 }
237
238 fn calculate_keyword_score(&self, query: &str, content: &str) -> f32 {
240 let query_lower = query.to_lowercase();
241 let query_words: Vec<&str> = query_lower
242 .split_whitespace()
243 .filter(|w| w.len() > 2) .collect();
245
246 if query_words.is_empty() {
247 return 0.0;
248 }
249
250 let content_lower = content.to_lowercase();
251 let matches = query_words
252 .iter()
253 .filter(|&&word| content_lower.contains(word))
254 .count();
255
256 matches as f32 / query_words.len() as f32
257 }
258
259 fn apply_intent_adjustments(
261 &self,
262 base_score: f32,
263 intent: &QueryIntent,
264 result: &RagSearchResult,
265 ) -> f32 {
266 let mut adjusted_score = base_score;
267
268 match intent {
269 QueryIntent::CodeSearch {
270 language,
271 component_type,
272 } => {
273 if self.looks_like_code(&result.content) {
275 adjusted_score *= 1.2;
276 }
277
278 if let Some(lang) = language {
280 if result.content.to_lowercase().contains(&lang.to_lowercase()) {
281 adjusted_score *= 1.1;
282 }
283 }
284
285 if let Some(comp_type) = component_type {
287 if result
288 .content
289 .to_lowercase()
290 .contains(&comp_type.to_lowercase())
291 {
292 adjusted_score *= 1.15;
293 }
294 }
295 }
296 QueryIntent::Documentation => {
297 if result.source_path.to_string_lossy().contains("doc")
299 || result.source_path.to_string_lossy().contains("readme")
300 || result.source_path.to_string_lossy().ends_with(".md")
301 {
302 adjusted_score *= 1.1;
303 }
304 }
305 QueryIntent::Configuration => {
306 if result.source_path.to_string_lossy().contains("config")
308 || result.source_path.to_string_lossy().ends_with(".json")
309 || result.source_path.to_string_lossy().ends_with(".yaml")
310 || result.source_path.to_string_lossy().ends_with(".toml")
311 {
312 adjusted_score *= 1.2;
313 }
314 }
315 _ => {}
316 }
317
318 adjusted_score
319 }
320
321 fn looks_like_code(&self, content: &str) -> bool {
323 let code_indicators = [
324 "function", "class", "struct", "impl", "def", "fn", "public", "private", "const",
325 "let", "var", "import", "use", "include", "package", "{", "}", "(", ")", ";", "=>",
326 "->",
327 ];
328
329 let indicator_count = code_indicators
330 .iter()
331 .filter(|&&indicator| content.contains(indicator))
332 .count();
333
334 indicator_count >= 3
335 }
336
337 fn calculate_variation_score(&self, query: &EnhancedQuery, content: &str) -> f32 {
339 let mut best_score: f32 = 0.0;
340
341 for variation in &query.variations {
342 let score = self.calculate_keyword_score(&variation.query, content) * variation.weight;
343 best_score = best_score.max(score);
344 }
345
346 best_score
347 }
348
349 fn extract_key_context(&self, query: &str, content: &str) -> Option<String> {
351 let query_lower = query.to_lowercase();
352 let query_words: Vec<&str> = query_lower.split_whitespace().collect();
353
354 let sentences: Vec<&str> = content
356 .split(['.', '\n', ';'])
357 .filter(|s| !s.trim().is_empty())
358 .collect();
359
360 let mut best_sentence = "";
361 let mut best_score = 0;
362
363 for sentence in sentences {
364 let sentence_lower = sentence.to_lowercase();
365 let matches = query_words
366 .iter()
367 .filter(|&&word| sentence_lower.contains(word))
368 .count();
369
370 if matches > best_score {
371 best_score = matches;
372 best_sentence = sentence;
373 }
374 }
375
376 if best_score > 0 {
377 Some(self.truncate_content(best_sentence.trim(), 200))
378 } else {
379 None
380 }
381 }
382
383 fn truncate_content(&self, content: &str, max_length: usize) -> String {
385 if content.len() <= max_length {
386 content.to_string()
387 } else {
388 let mut truncate_at = max_length;
390 while truncate_at > 0 && !content.is_char_boundary(truncate_at) {
391 truncate_at -= 1;
392 }
393 format!("{}...", &content[..truncate_at])
394 }
395 }
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401 use crate::rag::{DocumentMetadata, SourceType};
402 use std::path::PathBuf;
403
404 #[allow(dead_code)]
405 fn create_test_result(content: &str, score: f32) -> RagSearchResult {
406 RagSearchResult {
407 id: "test".to_string(),
408 content: content.to_string(),
409 source_path: PathBuf::from("test.rs"),
410 source_type: SourceType::Local,
411 title: None,
412 section: None,
413 score,
414 chunk_index: 0,
415 metadata: DocumentMetadata {
416 file_type: "rust".to_string(),
417 size: 100,
418 modified: chrono::Utc::now(),
419 tags: vec![],
420 language: Some("rust".to_string()),
421 },
422 }
423 }
424
425 #[tokio::test]
426 async fn test_keyword_scoring() {
427 let verifier = ResultVerifier::new(None, SmartSearchConfig::default());
428
429 let score = verifier.calculate_keyword_score(
430 "validate security function",
431 "fn validate_security() { /* security validation */ }",
432 );
433
434 assert!(score > 0.5);
435 }
436
437 #[tokio::test]
438 async fn test_code_detection() {
439 let verifier = ResultVerifier::new(None, SmartSearchConfig::default());
440
441 assert!(verifier.looks_like_code("fn main() { println!(\"hello\"); }"));
442 assert!(!verifier.looks_like_code("This is just regular text content."));
443 }
444}