Skip to main content

agentic_codebase/grounding/
hallucination.rs

1//! Hallucination Detector — Invention 5.
2//!
3//! Automatically detect when AI output contradicts the actual codebase.
4//! Builds on the Citation Engine to classify ungrounded claims by type
5//! and severity.
6
7use serde::{Deserialize, Serialize};
8
9use crate::graph::CodeGraph;
10
11use super::citation::{Citation, CitationEngine, GroundedClaim};
12use super::engine::extract_code_references;
13
14// ── Types ────────────────────────────────────────────────────────────────────
15
16/// Result of checking AI output for hallucinations.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct HallucinationCheck {
19    /// The AI output that was checked.
20    pub ai_output: String,
21    /// Detected hallucinations.
22    pub hallucinations: Vec<Hallucination>,
23    /// Claims that were verified.
24    pub verified_claims: Vec<GroundedClaim>,
25    /// Overall hallucination score (0 = none, 1 = all hallucinated).
26    pub hallucination_score: f64,
27    /// Is this output safe to use?
28    pub safe_to_use: bool,
29}
30
31/// A single detected hallucination.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct Hallucination {
34    /// The hallucinated claim.
35    pub claim: String,
36    /// Type of hallucination.
37    pub hallucination_type: HallucinationType,
38    /// What's actually true.
39    pub reality: String,
40    /// Evidence for reality.
41    pub evidence: Vec<Citation>,
42    /// Severity.
43    pub severity: HallucinationSeverity,
44}
45
46/// Type of hallucination detected.
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48pub enum HallucinationType {
49    /// Function/class doesn't exist.
50    NonExistent,
51    /// Exists but does something different.
52    WrongBehavior,
53    /// Wrong signature (params, return type).
54    WrongSignature,
55    /// Wrong location (different file/module).
56    WrongLocation,
57    /// Was true, no longer.
58    Outdated,
59    /// Invented feature.
60    InventedFeature,
61}
62
63/// Severity of a hallucination.
64#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
65pub enum HallucinationSeverity {
66    /// Minor inaccuracy.
67    Minor,
68    /// Would cause confusion.
69    Moderate,
70    /// Would cause errors.
71    Severe,
72    /// Would cause security/data issues.
73    Critical,
74}
75
76// ── HallucinationDetector ────────────────────────────────────────────────────
77
78/// Detector that finds hallucinations in AI output about code.
79pub struct HallucinationDetector<'g> {
80    citation_engine: CitationEngine<'g>,
81    graph: &'g CodeGraph,
82}
83
84impl<'g> HallucinationDetector<'g> {
85    /// Create a new detector backed by the given code graph.
86    pub fn new(graph: &'g CodeGraph) -> Self {
87        Self {
88            citation_engine: CitationEngine::new(graph),
89            graph,
90        }
91    }
92
93    /// Check AI output for hallucinations.
94    pub fn check_output(&self, ai_output: &str) -> HallucinationCheck {
95        let sentences = split_into_claims(ai_output);
96        let mut hallucinations = Vec::new();
97        let mut verified_claims = Vec::new();
98        let mut total_claims = 0usize;
99
100        for sentence in &sentences {
101            let refs = extract_code_references(sentence);
102            if refs.is_empty() {
103                continue; // Skip non-code sentences
104            }
105            total_claims += 1;
106
107            let grounded = self.citation_engine.ground_claim(sentence);
108            if grounded.fully_grounded {
109                verified_claims.push(grounded);
110            } else {
111                // Classify the hallucination
112                for reference in &refs {
113                    if let Some(hallucination) = self.classify_hallucination(sentence, reference) {
114                        hallucinations.push(hallucination);
115                    }
116                }
117            }
118        }
119
120        let hallucination_score = if total_claims == 0 {
121            0.0
122        } else {
123            hallucinations.len() as f64 / total_claims as f64
124        };
125
126        HallucinationCheck {
127            ai_output: ai_output.to_string(),
128            hallucinations,
129            verified_claims,
130            hallucination_score: hallucination_score.min(1.0),
131            safe_to_use: hallucination_score < 0.3,
132        }
133    }
134
135    /// Suggest fixes for detected hallucinations.
136    pub fn suggest_fixes(&self, check: &HallucinationCheck) -> Vec<String> {
137        let mut fixes = Vec::new();
138
139        for h in &check.hallucinations {
140            match h.hallucination_type {
141                HallucinationType::NonExistent => {
142                    // Try to find similar symbols
143                    let refs = extract_code_references(&h.claim);
144                    for r in &refs {
145                        let similar = self.find_similar_names(r);
146                        if !similar.is_empty() {
147                            fixes.push(format!(
148                                "Replace '{}' with one of: {}",
149                                r,
150                                similar.join(", ")
151                            ));
152                        } else {
153                            fixes.push(format!("Remove reference to non-existent '{}'", r));
154                        }
155                    }
156                }
157                HallucinationType::WrongLocation => {
158                    if !h.evidence.is_empty() {
159                        fixes.push(format!(
160                            "Correct location: actually in {}",
161                            h.evidence[0].location.file
162                        ));
163                    }
164                }
165                HallucinationType::WrongSignature => {
166                    if !h.evidence.is_empty() {
167                        fixes.push(format!("Correct signature: {}", h.evidence[0].code_snippet));
168                    }
169                }
170                _ => {
171                    fixes.push(format!("Review claim: {}", h.claim));
172                }
173            }
174        }
175
176        fixes
177    }
178
179    // ── Internal ─────────────────────────────────────────────────────────
180
181    fn classify_hallucination(&self, sentence: &str, reference: &str) -> Option<Hallucination> {
182        // Check if the reference exists at all
183        let mut found = false;
184        let mut found_unit = None;
185        for unit in self.graph.units() {
186            if unit.name == reference {
187                found = true;
188                found_unit = Some(unit);
189                break;
190            }
191        }
192
193        if !found {
194            // Check case-insensitive
195            let lower = reference.to_lowercase();
196            for unit in self.graph.units() {
197                if unit.name.to_lowercase() == lower {
198                    found = true;
199                    found_unit = Some(unit);
200                    break;
201                }
202            }
203        }
204
205        if !found {
206            // Symbol doesn't exist at all
207            return Some(Hallucination {
208                claim: sentence.to_string(),
209                hallucination_type: HallucinationType::NonExistent,
210                reality: format!("No symbol '{}' exists in the codebase", reference),
211                evidence: Vec::new(),
212                severity: HallucinationSeverity::Severe,
213            });
214        }
215
216        // Symbol exists — check if the claim about it is wrong
217        if let Some(unit) = found_unit {
218            let sentence_lower = sentence.to_lowercase();
219
220            // Check for wrong location claims
221            if sentence_lower.contains("in ") || sentence_lower.contains("file") {
222                let file_str = unit.file_path.display().to_string();
223                // If the sentence mentions a file path that doesn't match
224                let words: Vec<&str> = sentence.split_whitespace().collect();
225                for word in &words {
226                    let w = word.trim_matches(|c: char| {
227                        !c.is_alphanumeric() && c != '/' && c != '.' && c != '_'
228                    });
229                    if w.contains('/') && w.contains('.') && !file_str.contains(w) {
230                        let citation = self.citation_engine.cite_node(unit.id);
231                        return Some(Hallucination {
232                            claim: sentence.to_string(),
233                            hallucination_type: HallucinationType::WrongLocation,
234                            reality: format!("'{}' is in {}", reference, file_str),
235                            evidence: citation.into_iter().collect(),
236                            severity: HallucinationSeverity::Moderate,
237                        });
238                    }
239                }
240            }
241        }
242
243        None
244    }
245
246    fn find_similar_names(&self, name: &str) -> Vec<String> {
247        let lower = name.to_lowercase();
248        let mut results: Vec<(String, usize)> = Vec::new();
249
250        for unit in self.graph.units() {
251            let u_lower = unit.name.to_lowercase();
252            if (u_lower.starts_with(&lower) || lower.starts_with(&u_lower))
253                && !results.iter().any(|(n, _)| *n == unit.name)
254            {
255                results.push((unit.name.clone(), 0));
256            }
257        }
258
259        results.sort_by_key(|(_, d)| *d);
260        results.into_iter().take(5).map(|(n, _)| n).collect()
261    }
262}
263
264/// Split text into individual claim sentences.
265fn split_into_claims(text: &str) -> Vec<String> {
266    text.split(['.', '\n'])
267        .map(|s| s.trim().to_string())
268        .filter(|s| !s.is_empty() && s.len() > 5)
269        .collect()
270}
271
272// ── Tests ────────────────────────────────────────────────────────────────────
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use crate::types::{CodeUnit, CodeUnitType, Language, Span};
278    use std::path::PathBuf;
279
280    fn test_graph() -> CodeGraph {
281        let mut graph = CodeGraph::with_default_dimension();
282        graph.add_unit(CodeUnit::new(
283            CodeUnitType::Function,
284            Language::Python,
285            "process_payment".to_string(),
286            "payments.stripe.process_payment".to_string(),
287            PathBuf::from("src/payments/stripe.py"),
288            Span::new(10, 0, 30, 0),
289        ));
290        graph.add_unit(CodeUnit::new(
291            CodeUnitType::Type,
292            Language::Rust,
293            "CodeGraph".to_string(),
294            "crate::graph::CodeGraph".to_string(),
295            PathBuf::from("src/graph/code_graph.rs"),
296            Span::new(17, 0, 250, 0),
297        ));
298        graph
299    }
300
301    #[test]
302    fn detect_nonexistent_hallucination() {
303        let graph = test_graph();
304        let detector = HallucinationDetector::new(&graph);
305        let check = detector.check_output("The send_invoice function handles billing");
306        assert!(!check.hallucinations.is_empty());
307        assert_eq!(
308            check.hallucinations[0].hallucination_type,
309            HallucinationType::NonExistent
310        );
311    }
312
313    #[test]
314    fn verified_output_is_safe() {
315        let graph = test_graph();
316        let detector = HallucinationDetector::new(&graph);
317        let check = detector.check_output("The process_payment function exists in the codebase");
318        assert!(check.safe_to_use);
319    }
320
321    #[test]
322    fn suggest_fixes_for_nonexistent() {
323        let graph = test_graph();
324        let detector = HallucinationDetector::new(&graph);
325        let check = detector.check_output("The process_paymnt function works");
326        let fixes = detector.suggest_fixes(&check);
327        // Should suggest the correct name
328        assert!(!fixes.is_empty());
329    }
330
331    #[test]
332    fn hallucination_score_range() {
333        let graph = test_graph();
334        let detector = HallucinationDetector::new(&graph);
335        let check = detector.check_output("Normal text without code references");
336        assert!(check.hallucination_score >= 0.0 && check.hallucination_score <= 1.0);
337    }
338
339    #[test]
340    fn split_claims_works() {
341        let claims = split_into_claims("First claim. Second claim.\nThird claim");
342        assert_eq!(claims.len(), 3);
343    }
344}