agentic_codebase/grounding/
citation.rs1use serde::{Deserialize, Serialize};
8
9use crate::graph::CodeGraph;
10use crate::types::CodeUnit;
11
12use super::engine::extract_code_references;
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct GroundedClaim {
19 pub claim: String,
21 pub citations: Vec<Citation>,
23 pub confidence: f64,
25 pub fully_grounded: bool,
27}
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct Citation {
32 pub node_id: u64,
34 pub location: CodeLocation,
36 pub code_snippet: String,
38 pub relevance: String,
40 pub strength: CitationStrength,
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct CodeLocation {
47 pub file: String,
48 pub start_line: u32,
49 pub end_line: u32,
50 pub start_col: u32,
51 pub end_col: u32,
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
56pub enum CitationStrength {
57 Direct,
59 Strong,
61 Partial,
63 Weak,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
69pub struct UngroundedClaim {
70 pub claim: String,
72 pub reason: UngroundedReason,
74 pub requirements: Vec<String>,
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
80pub enum UngroundedReason {
81 NotFound,
83 Contradicted,
85 Ambiguous,
87 OutOfScope,
89}
90
91pub struct CitationEngine<'g> {
95 graph: &'g CodeGraph,
96}
97
98impl<'g> CitationEngine<'g> {
99 pub fn new(graph: &'g CodeGraph) -> Self {
101 Self { graph }
102 }
103
104 pub fn ground_claim(&self, claim: &str) -> GroundedClaim {
106 let refs = extract_code_references(claim);
107
108 if refs.is_empty() {
109 return GroundedClaim {
110 claim: claim.to_string(),
111 citations: Vec::new(),
112 confidence: 0.0,
113 fully_grounded: false,
114 };
115 }
116
117 let mut citations = Vec::new();
118 let mut matched = 0usize;
119
120 for reference in &refs {
121 let found = self.find_citations(reference);
122 if !found.is_empty() {
123 matched += 1;
124 citations.extend(found);
125 }
126 }
127
128 let confidence = if refs.is_empty() {
129 0.0
130 } else {
131 matched as f64 / refs.len() as f64
132 };
133
134 GroundedClaim {
135 claim: claim.to_string(),
136 fully_grounded: matched == refs.len(),
137 confidence,
138 citations,
139 }
140 }
141
142 pub fn cite_node(&self, unit_id: u64) -> Option<Citation> {
144 let unit = self.graph.get_unit(unit_id)?;
145 Some(self.citation_from_unit(unit, "direct reference", CitationStrength::Direct))
146 }
147
148 pub fn verify_claim(&self, claim: &str) -> bool {
150 let grounded = self.ground_claim(claim);
151 grounded.fully_grounded
152 }
153
154 pub fn find_contradictions(&self, claim: &str) -> Vec<UngroundedClaim> {
156 let refs = extract_code_references(claim);
157 let mut contradictions = Vec::new();
158
159 for reference in &refs {
160 let exact = self.find_exact(reference);
162 if exact.is_empty() {
163 let similar = self.find_similar(reference);
165 let reason = if similar.is_empty() {
166 UngroundedReason::NotFound
167 } else {
168 UngroundedReason::Contradicted
169 };
170
171 let mut requirements = Vec::new();
172 if !similar.is_empty() {
173 requirements.push(format!(
174 "Did you mean: {}?",
175 similar
176 .iter()
177 .map(|u| u.name.as_str())
178 .collect::<Vec<_>>()
179 .join(", ")
180 ));
181 } else {
182 requirements.push(format!("No symbol '{}' found in codebase", reference));
183 }
184
185 contradictions.push(UngroundedClaim {
186 claim: format!("Reference to '{}'", reference),
187 reason,
188 requirements,
189 });
190 }
191 }
192
193 contradictions
194 }
195
196 fn find_citations(&self, name: &str) -> Vec<Citation> {
199 let mut results = Vec::new();
200
201 for unit in self.graph.units() {
203 if unit.name == name {
204 results.push(self.citation_from_unit(
205 unit,
206 "exact name match",
207 CitationStrength::Direct,
208 ));
209 }
210 }
211 if !results.is_empty() {
212 return results;
213 }
214
215 for unit in self.graph.units() {
217 if unit.qualified_name.contains(name) {
218 results.push(self.citation_from_unit(
219 unit,
220 "qualified name match",
221 CitationStrength::Strong,
222 ));
223 }
224 }
225 if !results.is_empty() {
226 return results;
227 }
228
229 let lower = name.to_lowercase();
231 for unit in self.graph.units() {
232 if unit.name.to_lowercase() == lower {
233 results.push(self.citation_from_unit(
234 unit,
235 "case-insensitive match",
236 CitationStrength::Partial,
237 ));
238 }
239 }
240
241 results
242 }
243
244 fn find_exact(&self, name: &str) -> Vec<&CodeUnit> {
245 self.graph
246 .units()
247 .iter()
248 .filter(|u| u.name == name)
249 .collect()
250 }
251
252 fn find_similar(&self, name: &str) -> Vec<&CodeUnit> {
253 let lower = name.to_lowercase();
254 self.graph
255 .units()
256 .iter()
257 .filter(|u| {
258 let u_lower = u.name.to_lowercase();
259 u_lower.starts_with(&lower)
260 || lower.starts_with(&u_lower)
261 || levenshtein_distance(&lower, &u_lower) <= name.len() / 3
262 })
263 .collect()
264 }
265
266 fn citation_from_unit(
267 &self,
268 unit: &CodeUnit,
269 relevance: &str,
270 strength: CitationStrength,
271 ) -> Citation {
272 Citation {
273 node_id: unit.id,
274 location: CodeLocation {
275 file: unit.file_path.display().to_string(),
276 start_line: unit.span.start_line,
277 end_line: unit.span.end_line,
278 start_col: unit.span.start_col,
279 end_col: unit.span.end_col,
280 },
281 code_snippet: unit.signature.clone().unwrap_or_else(|| unit.name.clone()),
282 relevance: relevance.to_string(),
283 strength,
284 }
285 }
286}
287
288fn levenshtein_distance(a: &str, b: &str) -> usize {
290 let a: Vec<char> = a.chars().collect();
291 let b: Vec<char> = b.chars().collect();
292 let m = a.len();
293 let n = b.len();
294 if m == 0 {
295 return n;
296 }
297 if n == 0 {
298 return m;
299 }
300 let mut prev: Vec<usize> = (0..=n).collect();
301 let mut curr = vec![0; n + 1];
302 for i in 1..=m {
303 curr[0] = i;
304 for j in 1..=n {
305 let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
306 curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
307 }
308 std::mem::swap(&mut prev, &mut curr);
309 }
310 prev[n]
311}
312
313#[cfg(test)]
316mod tests {
317 use super::*;
318 use crate::types::{CodeUnit, CodeUnitType, Language, Span};
319 use std::path::PathBuf;
320
321 fn test_graph() -> CodeGraph {
322 let mut graph = CodeGraph::with_default_dimension();
323 graph.add_unit(CodeUnit::new(
324 CodeUnitType::Function,
325 Language::Python,
326 "process_payment".to_string(),
327 "payments.stripe.process_payment".to_string(),
328 PathBuf::from("src/payments/stripe.py"),
329 Span::new(10, 0, 30, 0),
330 ));
331 graph.add_unit(CodeUnit::new(
332 CodeUnitType::Type,
333 Language::Rust,
334 "CodeGraph".to_string(),
335 "crate::graph::CodeGraph".to_string(),
336 PathBuf::from("src/graph/code_graph.rs"),
337 Span::new(17, 0, 250, 0),
338 ));
339 graph
340 }
341
342 #[test]
343 fn ground_claim_verified() {
344 let graph = test_graph();
345 let engine = CitationEngine::new(&graph);
346 let result = engine.ground_claim("The process_payment function exists");
347 assert!(result.fully_grounded);
348 assert!(!result.citations.is_empty());
349 assert_eq!(result.citations[0].strength, CitationStrength::Direct);
350 }
351
352 #[test]
353 fn ground_claim_ungrounded() {
354 let graph = test_graph();
355 let engine = CitationEngine::new(&graph);
356 let result = engine.ground_claim("The send_invoice function sends emails");
357 assert!(!result.fully_grounded);
358 assert!(result.confidence < 1.0);
359 }
360
361 #[test]
362 fn cite_node_returns_citation() {
363 let graph = test_graph();
364 let engine = CitationEngine::new(&graph);
365 let cite = engine.cite_node(0);
367 assert!(cite.is_some());
368 let c = cite.unwrap();
369 assert_eq!(c.strength, CitationStrength::Direct);
370 }
371
372 #[test]
373 fn find_contradictions_detects_missing() {
374 let graph = test_graph();
375 let engine = CitationEngine::new(&graph);
376 let contradictions = engine.find_contradictions("The nonexistent_function does things");
377 assert!(!contradictions.is_empty());
378 assert_eq!(contradictions[0].reason, UngroundedReason::NotFound);
379 }
380
381 #[test]
382 fn verify_claim_true() {
383 let graph = test_graph();
384 let engine = CitationEngine::new(&graph);
385 assert!(engine.verify_claim("process_payment exists in the codebase"));
386 }
387
388 #[test]
389 fn verify_claim_false() {
390 let graph = test_graph();
391 let engine = CitationEngine::new(&graph);
392 assert!(!engine.verify_claim("The missing_func handles errors"));
393 }
394}