agentic_codebase/grounding/
hallucination.rs1use serde::{Deserialize, Serialize};
8
9use crate::graph::CodeGraph;
10
11use super::citation::{Citation, CitationEngine, GroundedClaim};
12use super::engine::extract_code_references;
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct HallucinationCheck {
19 pub ai_output: String,
21 pub hallucinations: Vec<Hallucination>,
23 pub verified_claims: Vec<GroundedClaim>,
25 pub hallucination_score: f64,
27 pub safe_to_use: bool,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct Hallucination {
34 pub claim: String,
36 pub hallucination_type: HallucinationType,
38 pub reality: String,
40 pub evidence: Vec<Citation>,
42 pub severity: HallucinationSeverity,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48pub enum HallucinationType {
49 NonExistent,
51 WrongBehavior,
53 WrongSignature,
55 WrongLocation,
57 Outdated,
59 InventedFeature,
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
65pub enum HallucinationSeverity {
66 Minor,
68 Moderate,
70 Severe,
72 Critical,
74}
75
76pub struct HallucinationDetector<'g> {
80 citation_engine: CitationEngine<'g>,
81 graph: &'g CodeGraph,
82}
83
84impl<'g> HallucinationDetector<'g> {
85 pub fn new(graph: &'g CodeGraph) -> Self {
87 Self {
88 citation_engine: CitationEngine::new(graph),
89 graph,
90 }
91 }
92
93 pub fn check_output(&self, ai_output: &str) -> HallucinationCheck {
95 let sentences = split_into_claims(ai_output);
96 let mut hallucinations = Vec::new();
97 let mut verified_claims = Vec::new();
98 let mut total_claims = 0usize;
99
100 for sentence in &sentences {
101 let refs = extract_code_references(sentence);
102 if refs.is_empty() {
103 continue; }
105 total_claims += 1;
106
107 let grounded = self.citation_engine.ground_claim(sentence);
108 if grounded.fully_grounded {
109 verified_claims.push(grounded);
110 } else {
111 for reference in &refs {
113 if let Some(hallucination) = self.classify_hallucination(sentence, reference) {
114 hallucinations.push(hallucination);
115 }
116 }
117 }
118 }
119
120 let hallucination_score = if total_claims == 0 {
121 0.0
122 } else {
123 hallucinations.len() as f64 / total_claims as f64
124 };
125
126 HallucinationCheck {
127 ai_output: ai_output.to_string(),
128 hallucinations,
129 verified_claims,
130 hallucination_score: hallucination_score.min(1.0),
131 safe_to_use: hallucination_score < 0.3,
132 }
133 }
134
135 pub fn suggest_fixes(&self, check: &HallucinationCheck) -> Vec<String> {
137 let mut fixes = Vec::new();
138
139 for h in &check.hallucinations {
140 match h.hallucination_type {
141 HallucinationType::NonExistent => {
142 let refs = extract_code_references(&h.claim);
144 for r in &refs {
145 let similar = self.find_similar_names(r);
146 if !similar.is_empty() {
147 fixes.push(format!(
148 "Replace '{}' with one of: {}",
149 r,
150 similar.join(", ")
151 ));
152 } else {
153 fixes.push(format!("Remove reference to non-existent '{}'", r));
154 }
155 }
156 }
157 HallucinationType::WrongLocation => {
158 if !h.evidence.is_empty() {
159 fixes.push(format!(
160 "Correct location: actually in {}",
161 h.evidence[0].location.file
162 ));
163 }
164 }
165 HallucinationType::WrongSignature => {
166 if !h.evidence.is_empty() {
167 fixes.push(format!("Correct signature: {}", h.evidence[0].code_snippet));
168 }
169 }
170 _ => {
171 fixes.push(format!("Review claim: {}", h.claim));
172 }
173 }
174 }
175
176 fixes
177 }
178
179 fn classify_hallucination(&self, sentence: &str, reference: &str) -> Option<Hallucination> {
182 let mut found = false;
184 let mut found_unit = None;
185 for unit in self.graph.units() {
186 if unit.name == reference {
187 found = true;
188 found_unit = Some(unit);
189 break;
190 }
191 }
192
193 if !found {
194 let lower = reference.to_lowercase();
196 for unit in self.graph.units() {
197 if unit.name.to_lowercase() == lower {
198 found = true;
199 found_unit = Some(unit);
200 break;
201 }
202 }
203 }
204
205 if !found {
206 return Some(Hallucination {
208 claim: sentence.to_string(),
209 hallucination_type: HallucinationType::NonExistent,
210 reality: format!("No symbol '{}' exists in the codebase", reference),
211 evidence: Vec::new(),
212 severity: HallucinationSeverity::Severe,
213 });
214 }
215
216 if let Some(unit) = found_unit {
218 let sentence_lower = sentence.to_lowercase();
219
220 if sentence_lower.contains("in ") || sentence_lower.contains("file") {
222 let file_str = unit.file_path.display().to_string();
223 let words: Vec<&str> = sentence.split_whitespace().collect();
225 for word in &words {
226 let w = word.trim_matches(|c: char| {
227 !c.is_alphanumeric() && c != '/' && c != '.' && c != '_'
228 });
229 if w.contains('/') && w.contains('.') && !file_str.contains(w) {
230 let citation = self.citation_engine.cite_node(unit.id);
231 return Some(Hallucination {
232 claim: sentence.to_string(),
233 hallucination_type: HallucinationType::WrongLocation,
234 reality: format!("'{}' is in {}", reference, file_str),
235 evidence: citation.into_iter().collect(),
236 severity: HallucinationSeverity::Moderate,
237 });
238 }
239 }
240 }
241 }
242
243 None
244 }
245
246 fn find_similar_names(&self, name: &str) -> Vec<String> {
247 let lower = name.to_lowercase();
248 let mut results: Vec<(String, usize)> = Vec::new();
249
250 for unit in self.graph.units() {
251 let u_lower = unit.name.to_lowercase();
252 if (u_lower.starts_with(&lower) || lower.starts_with(&u_lower))
253 && !results.iter().any(|(n, _)| *n == unit.name)
254 {
255 results.push((unit.name.clone(), 0));
256 }
257 }
258
259 results.sort_by_key(|(_, d)| *d);
260 results.into_iter().take(5).map(|(n, _)| n).collect()
261 }
262}
263
264fn split_into_claims(text: &str) -> Vec<String> {
266 text.split(['.', '\n'])
267 .map(|s| s.trim().to_string())
268 .filter(|s| !s.is_empty() && s.len() > 5)
269 .collect()
270}
271
272#[cfg(test)]
275mod tests {
276 use super::*;
277 use crate::types::{CodeUnit, CodeUnitType, Language, Span};
278 use std::path::PathBuf;
279
280 fn test_graph() -> CodeGraph {
281 let mut graph = CodeGraph::with_default_dimension();
282 graph.add_unit(CodeUnit::new(
283 CodeUnitType::Function,
284 Language::Python,
285 "process_payment".to_string(),
286 "payments.stripe.process_payment".to_string(),
287 PathBuf::from("src/payments/stripe.py"),
288 Span::new(10, 0, 30, 0),
289 ));
290 graph.add_unit(CodeUnit::new(
291 CodeUnitType::Type,
292 Language::Rust,
293 "CodeGraph".to_string(),
294 "crate::graph::CodeGraph".to_string(),
295 PathBuf::from("src/graph/code_graph.rs"),
296 Span::new(17, 0, 250, 0),
297 ));
298 graph
299 }
300
301 #[test]
302 fn detect_nonexistent_hallucination() {
303 let graph = test_graph();
304 let detector = HallucinationDetector::new(&graph);
305 let check = detector.check_output("The send_invoice function handles billing");
306 assert!(!check.hallucinations.is_empty());
307 assert_eq!(
308 check.hallucinations[0].hallucination_type,
309 HallucinationType::NonExistent
310 );
311 }
312
313 #[test]
314 fn verified_output_is_safe() {
315 let graph = test_graph();
316 let detector = HallucinationDetector::new(&graph);
317 let check = detector.check_output("The process_payment function exists in the codebase");
318 assert!(check.safe_to_use);
319 }
320
321 #[test]
322 fn suggest_fixes_for_nonexistent() {
323 let graph = test_graph();
324 let detector = HallucinationDetector::new(&graph);
325 let check = detector.check_output("The process_paymnt function works");
326 let fixes = detector.suggest_fixes(&check);
327 assert!(!fixes.is_empty());
329 }
330
331 #[test]
332 fn hallucination_score_range() {
333 let graph = test_graph();
334 let detector = HallucinationDetector::new(&graph);
335 let check = detector.check_output("Normal text without code references");
336 assert!(check.hallucination_score >= 0.0 && check.hallucination_score <= 1.0);
337 }
338
339 #[test]
340 fn split_claims_works() {
341 let claims = split_into_claims("First claim. Second claim.\nThird claim");
342 assert_eq!(claims.len(), 3);
343 }
344}