Skip to main content

agentic_codebase/grounding/
citation.rs

1//! Citation Engine — Invention 4.
2//!
3//! Every claim about code MUST be backed by a citation to the actual graph node.
4//! Transforms grounding from binary (exists / doesn't exist) into rich evidence
5//! with source locations, code snippets, and citation strength.
6
7use serde::{Deserialize, Serialize};
8
9use crate::graph::CodeGraph;
10use crate::types::CodeUnit;
11
12use super::engine::extract_code_references;
13
14// ── Types ────────────────────────────────────────────────────────────────────
15
16/// A grounded claim about code with full citations.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct GroundedClaim {
19    /// The claim being made.
20    pub claim: String,
21    /// Citations proving the claim.
22    pub citations: Vec<Citation>,
23    /// Confidence based on citation strength.
24    pub confidence: f64,
25    /// Is this claim fully grounded?
26    pub fully_grounded: bool,
27}
28
29/// A citation to a specific code node.
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct Citation {
32    /// The node being cited.
33    pub node_id: u64,
34    /// Specific location in source.
35    pub location: CodeLocation,
36    /// The actual code being cited (signature or name).
37    pub code_snippet: String,
38    /// How this supports the claim.
39    pub relevance: String,
40    /// Strength of evidence.
41    pub strength: CitationStrength,
42}
43
44/// Precise location in source code.
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct CodeLocation {
47    pub file: String,
48    pub start_line: u32,
49    pub end_line: u32,
50    pub start_col: u32,
51    pub end_col: u32,
52}
53
54/// Strength of a citation.
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
56pub enum CitationStrength {
57    /// Directly proves the claim.
58    Direct,
59    /// Strongly supports the claim.
60    Strong,
61    /// Partially supports.
62    Partial,
63    /// Weak/circumstantial.
64    Weak,
65}
66
67/// A claim that couldn't be grounded.
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct UngroundedClaim {
70    /// The claim attempted.
71    pub claim: String,
72    /// Why it couldn't be grounded.
73    pub reason: UngroundedReason,
74    /// What would be needed to ground it.
75    pub requirements: Vec<String>,
76}
77
78/// Why a claim couldn't be grounded.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
80pub enum UngroundedReason {
81    /// No matching code found.
82    NotFound,
83    /// Code found but doesn't support claim.
84    Contradicted,
85    /// Ambiguous (multiple interpretations).
86    Ambiguous,
87    /// Outside indexed scope.
88    OutOfScope,
89}
90
91// ── CitationEngine ───────────────────────────────────────────────────────────
92
93/// Engine that produces rich citations for code claims.
94pub struct CitationEngine<'g> {
95    graph: &'g CodeGraph,
96}
97
98impl<'g> CitationEngine<'g> {
99    /// Create a new citation engine backed by the given code graph.
100    pub fn new(graph: &'g CodeGraph) -> Self {
101        Self { graph }
102    }
103
104    /// Ground a natural-language claim with full citations.
105    pub fn ground_claim(&self, claim: &str) -> GroundedClaim {
106        let refs = extract_code_references(claim);
107
108        if refs.is_empty() {
109            return GroundedClaim {
110                claim: claim.to_string(),
111                citations: Vec::new(),
112                confidence: 0.0,
113                fully_grounded: false,
114            };
115        }
116
117        let mut citations = Vec::new();
118        let mut matched = 0usize;
119
120        for reference in &refs {
121            let found = self.find_citations(reference);
122            if !found.is_empty() {
123                matched += 1;
124                citations.extend(found);
125            }
126        }
127
128        let confidence = if refs.is_empty() {
129            0.0
130        } else {
131            matched as f64 / refs.len() as f64
132        };
133
134        GroundedClaim {
135            claim: claim.to_string(),
136            fully_grounded: matched == refs.len(),
137            confidence,
138            citations,
139        }
140    }
141
142    /// Build a citation for a specific node by ID.
143    pub fn cite_node(&self, unit_id: u64) -> Option<Citation> {
144        let unit = self.graph.get_unit(unit_id)?;
145        Some(self.citation_from_unit(unit, "direct reference", CitationStrength::Direct))
146    }
147
148    /// Verify if a specific claim is true (simpler API).
149    pub fn verify_claim(&self, claim: &str) -> bool {
150        let grounded = self.ground_claim(claim);
151        grounded.fully_grounded
152    }
153
154    /// Find claims in text that contradict the codebase.
155    pub fn find_contradictions(&self, claim: &str) -> Vec<UngroundedClaim> {
156        let refs = extract_code_references(claim);
157        let mut contradictions = Vec::new();
158
159        for reference in &refs {
160            // Check if reference exists at all
161            let exact = self.find_exact(reference);
162            if exact.is_empty() {
163                // Check if there's something similar (possible wrong name)
164                let similar = self.find_similar(reference);
165                let reason = if similar.is_empty() {
166                    UngroundedReason::NotFound
167                } else {
168                    UngroundedReason::Contradicted
169                };
170
171                let mut requirements = Vec::new();
172                if !similar.is_empty() {
173                    requirements.push(format!(
174                        "Did you mean: {}?",
175                        similar
176                            .iter()
177                            .map(|u| u.name.as_str())
178                            .collect::<Vec<_>>()
179                            .join(", ")
180                    ));
181                } else {
182                    requirements.push(format!("No symbol '{}' found in codebase", reference));
183                }
184
185                contradictions.push(UngroundedClaim {
186                    claim: format!("Reference to '{}'", reference),
187                    reason,
188                    requirements,
189                });
190            }
191        }
192
193        contradictions
194    }
195
196    // ── Internal helpers ─────────────────────────────────────────────────
197
198    fn find_citations(&self, name: &str) -> Vec<Citation> {
199        let mut results = Vec::new();
200
201        // 1. Exact match — Direct strength
202        for unit in self.graph.units() {
203            if unit.name == name {
204                results.push(self.citation_from_unit(
205                    unit,
206                    "exact name match",
207                    CitationStrength::Direct,
208                ));
209            }
210        }
211        if !results.is_empty() {
212            return results;
213        }
214
215        // 2. Qualified name contains — Strong strength
216        for unit in self.graph.units() {
217            if unit.qualified_name.contains(name) {
218                results.push(self.citation_from_unit(
219                    unit,
220                    "qualified name match",
221                    CitationStrength::Strong,
222                ));
223            }
224        }
225        if !results.is_empty() {
226            return results;
227        }
228
229        // 3. Case-insensitive — Partial strength
230        let lower = name.to_lowercase();
231        for unit in self.graph.units() {
232            if unit.name.to_lowercase() == lower {
233                results.push(self.citation_from_unit(
234                    unit,
235                    "case-insensitive match",
236                    CitationStrength::Partial,
237                ));
238            }
239        }
240
241        results
242    }
243
244    fn find_exact(&self, name: &str) -> Vec<&CodeUnit> {
245        self.graph
246            .units()
247            .iter()
248            .filter(|u| u.name == name)
249            .collect()
250    }
251
252    fn find_similar(&self, name: &str) -> Vec<&CodeUnit> {
253        let lower = name.to_lowercase();
254        self.graph
255            .units()
256            .iter()
257            .filter(|u| {
258                let u_lower = u.name.to_lowercase();
259                u_lower.starts_with(&lower)
260                    || lower.starts_with(&u_lower)
261                    || levenshtein_distance(&lower, &u_lower) <= name.len() / 3
262            })
263            .collect()
264    }
265
266    fn citation_from_unit(
267        &self,
268        unit: &CodeUnit,
269        relevance: &str,
270        strength: CitationStrength,
271    ) -> Citation {
272        Citation {
273            node_id: unit.id,
274            location: CodeLocation {
275                file: unit.file_path.display().to_string(),
276                start_line: unit.span.start_line,
277                end_line: unit.span.end_line,
278                start_col: unit.span.start_col,
279                end_col: unit.span.end_col,
280            },
281            code_snippet: unit.signature.clone().unwrap_or_else(|| unit.name.clone()),
282            relevance: relevance.to_string(),
283            strength,
284        }
285    }
286}
287
288/// Simple Levenshtein distance for internal use.
289fn levenshtein_distance(a: &str, b: &str) -> usize {
290    let a: Vec<char> = a.chars().collect();
291    let b: Vec<char> = b.chars().collect();
292    let m = a.len();
293    let n = b.len();
294    if m == 0 {
295        return n;
296    }
297    if n == 0 {
298        return m;
299    }
300    let mut prev: Vec<usize> = (0..=n).collect();
301    let mut curr = vec![0; n + 1];
302    for i in 1..=m {
303        curr[0] = i;
304        for j in 1..=n {
305            let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
306            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
307        }
308        std::mem::swap(&mut prev, &mut curr);
309    }
310    prev[n]
311}
312
313// ── Tests ────────────────────────────────────────────────────────────────────
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use crate::types::{CodeUnit, CodeUnitType, Language, Span};
319    use std::path::PathBuf;
320
321    fn test_graph() -> CodeGraph {
322        let mut graph = CodeGraph::with_default_dimension();
323        graph.add_unit(CodeUnit::new(
324            CodeUnitType::Function,
325            Language::Python,
326            "process_payment".to_string(),
327            "payments.stripe.process_payment".to_string(),
328            PathBuf::from("src/payments/stripe.py"),
329            Span::new(10, 0, 30, 0),
330        ));
331        graph.add_unit(CodeUnit::new(
332            CodeUnitType::Type,
333            Language::Rust,
334            "CodeGraph".to_string(),
335            "crate::graph::CodeGraph".to_string(),
336            PathBuf::from("src/graph/code_graph.rs"),
337            Span::new(17, 0, 250, 0),
338        ));
339        graph
340    }
341
342    #[test]
343    fn ground_claim_verified() {
344        let graph = test_graph();
345        let engine = CitationEngine::new(&graph);
346        let result = engine.ground_claim("The process_payment function exists");
347        assert!(result.fully_grounded);
348        assert!(!result.citations.is_empty());
349        assert_eq!(result.citations[0].strength, CitationStrength::Direct);
350    }
351
352    #[test]
353    fn ground_claim_ungrounded() {
354        let graph = test_graph();
355        let engine = CitationEngine::new(&graph);
356        let result = engine.ground_claim("The send_invoice function sends emails");
357        assert!(!result.fully_grounded);
358        assert!(result.confidence < 1.0);
359    }
360
361    #[test]
362    fn cite_node_returns_citation() {
363        let graph = test_graph();
364        let engine = CitationEngine::new(&graph);
365        // unit IDs are assigned sequentially starting from 0
366        let cite = engine.cite_node(0);
367        assert!(cite.is_some());
368        let c = cite.unwrap();
369        assert_eq!(c.strength, CitationStrength::Direct);
370    }
371
372    #[test]
373    fn find_contradictions_detects_missing() {
374        let graph = test_graph();
375        let engine = CitationEngine::new(&graph);
376        let contradictions = engine.find_contradictions("The nonexistent_function does things");
377        assert!(!contradictions.is_empty());
378        assert_eq!(contradictions[0].reason, UngroundedReason::NotFound);
379    }
380
381    #[test]
382    fn verify_claim_true() {
383        let graph = test_graph();
384        let engine = CitationEngine::new(&graph);
385        assert!(engine.verify_claim("process_payment exists in the codebase"));
386    }
387
388    #[test]
389    fn verify_claim_false() {
390        let graph = test_graph();
391        let engine = CitationEngine::new(&graph);
392        assert!(!engine.verify_claim("The missing_func handles errors"));
393    }
394}