1use std::collections::{HashMap, HashSet};
11use std::path::Path;
12
13use anyhow::{Context, Result};
14use serde::{Deserialize, Serialize};
15
16use crate::extract;
17use crate::lang::LanguageRegistry;
18
19#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
21pub enum ChangeKind {
22 Added,
24 Removed,
26 SignatureChanged,
28 Modified,
30 Moved { from_file: String },
32}
33
34impl std::fmt::Display for ChangeKind {
35 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36 match self {
37 ChangeKind::Added => write!(f, "ADDED"),
38 ChangeKind::Removed => write!(f, "REMOVED"),
39 ChangeKind::SignatureChanged => write!(f, "SIGNATURE_CHANGED"),
40 ChangeKind::Modified => write!(f, "MODIFIED"),
41 ChangeKind::Moved { from_file } => write!(f, "MOVED(from:{})", from_file),
42 }
43 }
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct SymbolChange {
49 pub name: String,
50 pub kind: String,
51 pub file: String,
52 pub change: ChangeKind,
53 pub caller_count: usize,
55}
56
57#[derive(Debug, Default)]
59pub struct SymbolDiff {
60 pub old_ref: String,
61 pub new_ref: String,
62 pub changes: Vec<SymbolChange>,
63}
64
65impl SymbolDiff {
66 pub fn added(&self) -> impl Iterator<Item = &SymbolChange> {
67 self.changes
68 .iter()
69 .filter(|c| c.change == ChangeKind::Added)
70 }
71 pub fn removed(&self) -> impl Iterator<Item = &SymbolChange> {
72 self.changes
73 .iter()
74 .filter(|c| c.change == ChangeKind::Removed)
75 }
76 pub fn modified(&self) -> impl Iterator<Item = &SymbolChange> {
77 self.changes.iter().filter(|c| {
78 matches!(
79 c.change,
80 ChangeKind::Modified | ChangeKind::SignatureChanged | ChangeKind::Moved { .. }
81 )
82 })
83 }
84}
85
86#[derive(Clone)]
92struct FlatSym {
93 file: String,
94 name: String,
95 kind: String,
96 sig_hash: String,
97}
98
99pub fn semantic_diff(
104 project_root: &Path,
105 old_ref: &str,
106 new_ref: &str,
107 registry: &LanguageRegistry,
108) -> Result<SymbolDiff> {
109 let changed = compute_changed_files(project_root, old_ref, new_ref);
110
111 let (old_filter, new_filter) = match &changed {
112 Some(cf) => (Some(&cf.old_ref_files), Some(&cf.new_ref_files)),
113 None => (None, None),
114 };
115
116 let old_symbols = extract_ref_symbols(project_root, old_ref, registry, old_filter)
117 .with_context(|| format!("failed to extract symbols for ref '{}'", old_ref))?;
118 let new_symbols = extract_ref_symbols(project_root, new_ref, registry, new_filter)
119 .with_context(|| format!("failed to extract symbols for ref '{}'", new_ref))?;
120
121 Ok(diff_symbol_maps(old_ref, new_ref, old_symbols, new_symbols))
122}
123
124struct ChangedFiles {
125 old_ref_files: HashSet<String>,
126 new_ref_files: HashSet<String>,
127}
128
129fn compute_changed_files(
130 project_root: &Path,
131 old_ref: &str,
132 new_ref: &str,
133) -> Option<ChangedFiles> {
134 let output = std::process::Command::new("git")
135 .args(["diff", "--name-status", "--no-renames", old_ref, new_ref])
136 .current_dir(project_root)
137 .output()
138 .ok()?;
139
140 if !output.status.success() {
141 eprintln!(
142 "infigraph: git diff --name-status failed for {}..{}, falling back to full extraction",
143 old_ref, new_ref
144 );
145 return None;
146 }
147
148 let text = String::from_utf8_lossy(&output.stdout);
149 let mut old_ref_files = HashSet::new();
150 let mut new_ref_files = HashSet::new();
151
152 for line in text.lines() {
153 let line = line.trim();
154 if line.is_empty() {
155 continue;
156 }
157 let mut parts = line.splitn(2, '\t');
158 let status = parts.next().unwrap_or("").trim();
159 let path = match parts.next() {
160 Some(p) => p.trim().to_string(),
161 None => continue,
162 };
163
164 match status {
165 "A" => {
166 new_ref_files.insert(path);
167 }
168 "D" => {
169 old_ref_files.insert(path);
170 }
171 _ => {
172 old_ref_files.insert(path.clone());
173 new_ref_files.insert(path);
174 }
175 }
176 }
177
178 Some(ChangedFiles {
179 old_ref_files,
180 new_ref_files,
181 })
182}
183
184const MAX_ARCHIVE_ARGS: usize = 500;
191
192fn extract_ref_symbols(
193 project_root: &Path,
194 git_ref: &str,
195 registry: &LanguageRegistry,
196 file_filter: Option<&HashSet<String>>,
197) -> Result<HashMap<String, FlatSym>> {
198 if let Some(filter) = file_filter {
199 if filter.is_empty() {
200 return Ok(HashMap::new());
201 }
202 }
203
204 let is_working_tree = git_ref == "HEAD" || git_ref == "WORKING";
205
206 if is_working_tree {
207 return extract_dir_symbols(project_root, project_root, registry, file_filter);
208 }
209
210 let tmp = tempfile::tempdir().context("failed to create temp dir")?;
211
212 let use_filtered_archive = file_filter
213 .map(|f| f.len() <= MAX_ARCHIVE_ARGS)
214 .unwrap_or(false);
215
216 let archive_output = if use_filtered_archive {
217 let filter = file_filter.unwrap();
218 let mut args: Vec<&str> = vec!["archive", "--format=tar", git_ref, "--"];
219 args.extend(filter.iter().map(|s| s.as_str()));
220 std::process::Command::new("git")
221 .args(&args)
222 .current_dir(project_root)
223 .output()
224 .context("git archive (filtered) failed")?
225 } else {
226 std::process::Command::new("git")
227 .args(["archive", "--format=tar", git_ref])
228 .current_dir(project_root)
229 .output()
230 .context("git archive failed")?
231 };
232
233 if !archive_output.status.success() {
234 let err = String::from_utf8_lossy(&archive_output.stderr);
235 if use_filtered_archive {
236 eprintln!(
237 "infigraph: filtered git archive for {} failed, falling back to full archive: {}",
238 git_ref,
239 err.trim()
240 );
241 let full_output = std::process::Command::new("git")
242 .args(["archive", "--format=tar", git_ref])
243 .current_dir(project_root)
244 .output()
245 .context("git archive (full fallback) failed")?;
246 if !full_output.status.success() {
247 let err2 = String::from_utf8_lossy(&full_output.stderr);
248 anyhow::bail!("git archive {} failed: {}", git_ref, err2.trim());
249 }
250 return untar_and_extract(tmp.path(), &full_output.stdout, registry, file_filter);
251 }
252 anyhow::bail!("git archive {} failed: {}", git_ref, err.trim());
253 }
254
255 untar_and_extract(tmp.path(), &archive_output.stdout, registry, file_filter)
256}
257
258fn untar_and_extract(
259 tmp_dir: &Path,
260 tar_data: &[u8],
261 registry: &LanguageRegistry,
262 file_filter: Option<&HashSet<String>>,
263) -> Result<HashMap<String, FlatSym>> {
264 let mut tar = std::process::Command::new("tar")
265 .args(["-x", "-C", tmp_dir.to_str().unwrap_or(".")])
266 .stdin(std::process::Stdio::piped())
267 .spawn()
268 .context("failed to spawn tar")?;
269
270 if let Some(stdin) = tar.stdin.take() {
271 use std::io::Write;
272 let mut w = stdin;
273 w.write_all(tar_data)?;
274 }
275 tar.wait().context("tar wait failed")?;
276
277 extract_dir_symbols(tmp_dir, tmp_dir, registry, file_filter)
278}
279
280fn extract_dir_symbols(
281 root: &Path,
282 dir: &Path,
283 registry: &LanguageRegistry,
284 file_filter: Option<&HashSet<String>>,
285) -> Result<HashMap<String, FlatSym>> {
286 let mut map = HashMap::new();
287 collect_symbols(root, dir, registry, file_filter, &mut map)?;
288 Ok(map)
289}
290
291static SKIP_DIRS: &[&str] = &[
292 ".git",
293 "node_modules",
294 ".venv",
295 "venv",
296 "target",
297 "build",
298 "dist",
299 "__pycache__",
300 ".tox",
301 ".infigraph",
302];
303
304fn collect_symbols(
305 root: &Path,
306 dir: &Path,
307 registry: &LanguageRegistry,
308 file_filter: Option<&HashSet<String>>,
309 map: &mut HashMap<String, FlatSym>,
310) -> Result<()> {
311 for entry in std::fs::read_dir(dir)? {
312 let entry = entry?;
313 let path = entry.path();
314 let name = entry.file_name();
315 let name_str = name.to_string_lossy();
316
317 if path.is_dir() {
318 if !SKIP_DIRS.contains(&name_str.as_ref()) && !name_str.starts_with('.') {
319 collect_symbols(root, &path, registry, file_filter, map)?;
320 }
321 } else if path.is_file() {
322 let rel = path
323 .strip_prefix(root)
324 .unwrap_or(&path)
325 .to_string_lossy()
326 .replace('\\', "/");
327 if let Some(filter) = file_filter {
328 if !filter.contains(&rel) {
329 continue;
330 }
331 }
332 let Ok(source) = std::fs::read(&path) else {
333 continue;
334 };
335 let Some(pack) = registry.for_file_with_content(&rel, &source) else {
336 continue;
337 };
338 let Ok(extraction) = extract::extract_file(&rel, &source, pack) else {
339 continue;
340 };
341 let file = extraction.file.clone();
342 for sym in &extraction.symbols {
343 let kind_str = sym.kind.as_str().to_string();
344 let key = format!("{}::{}::{}", file, sym.name, kind_str);
346 map.insert(
347 key,
348 FlatSym {
349 file: file.clone(),
350 name: sym.name.clone(),
351 kind: kind_str,
352 sig_hash: sym.signature_hash.clone(),
353 },
354 );
355 }
356 }
357 }
358 Ok(())
359}
360
361fn diff_symbol_maps(
366 old_ref: &str,
367 new_ref: &str,
368 old: HashMap<String, FlatSym>,
369 new: HashMap<String, FlatSym>,
370) -> SymbolDiff {
371 let mut changes = Vec::new();
372
373 let old_by_name: HashMap<String, &FlatSym> = old
375 .values()
376 .map(|s| (format!("{}::{}", s.name, s.kind), s))
377 .collect();
378
379 for (key, new_sym) in &new {
381 if let Some(old_sym) = old.get(key) {
382 if old_sym.sig_hash != new_sym.sig_hash
384 && !old_sym.sig_hash.is_empty()
385 && !new_sym.sig_hash.is_empty()
386 {
387 changes.push(SymbolChange {
388 name: new_sym.name.clone(),
389 kind: new_sym.kind.clone(),
390 file: new_sym.file.clone(),
391 change: ChangeKind::SignatureChanged,
392 caller_count: 0,
393 });
394 }
395 } else {
396 let name_key = format!("{}::{}", new_sym.name, new_sym.kind);
398 if let Some(old_sym) = old_by_name.get(&name_key) {
399 if old_sym.file != new_sym.file {
400 changes.push(SymbolChange {
401 name: new_sym.name.clone(),
402 kind: new_sym.kind.clone(),
403 file: new_sym.file.clone(),
404 change: ChangeKind::Moved {
405 from_file: old_sym.file.clone(),
406 },
407 caller_count: 0,
408 });
409 continue;
410 }
411 }
412 changes.push(SymbolChange {
414 name: new_sym.name.clone(),
415 kind: new_sym.kind.clone(),
416 file: new_sym.file.clone(),
417 change: ChangeKind::Added,
418 caller_count: 0,
419 });
420 }
421 }
422
423 let moved_names: std::collections::HashSet<String> = changes
425 .iter()
426 .filter_map(|c| {
427 if matches!(c.change, ChangeKind::Moved { .. }) {
428 Some(format!("{}::{}", c.name, c.kind))
429 } else {
430 None
431 }
432 })
433 .collect();
434
435 for (key, old_sym) in &old {
436 if !new.contains_key(key) {
437 let name_key = format!("{}::{}", old_sym.name, old_sym.kind);
438 if !moved_names.contains(&name_key) {
439 changes.push(SymbolChange {
440 name: old_sym.name.clone(),
441 kind: old_sym.kind.clone(),
442 file: old_sym.file.clone(),
443 change: ChangeKind::Removed,
444 caller_count: 0,
445 });
446 }
447 }
448 }
449
450 changes.sort_by_key(|c| match &c.change {
452 ChangeKind::Removed => 0,
453 ChangeKind::SignatureChanged => 1,
454 ChangeKind::Modified => 2,
455 ChangeKind::Moved { .. } => 3,
456 ChangeKind::Added => 4,
457 });
458
459 SymbolDiff {
460 old_ref: old_ref.to_string(),
461 new_ref: new_ref.to_string(),
462 changes,
463 }
464}
465
466pub fn format_diff(diff: &SymbolDiff) -> String {
471 if diff.changes.is_empty() {
472 return format!(
473 "No symbol-level changes between '{}' and '{}'.",
474 diff.old_ref, diff.new_ref
475 );
476 }
477
478 let added = diff.added().count();
479 let removed = diff.removed().count();
480 let modified = diff.modified().count();
481
482 let mut out = format!(
483 "Semantic diff {} → {} [+{} added -{} removed ~{} modified]\n\n",
484 diff.old_ref, diff.new_ref, added, removed, modified
485 );
486
487 let mut cur_file = String::new();
488 for c in &diff.changes {
489 if c.file != cur_file {
490 out.push_str(&format!(" {}\n", c.file));
491 cur_file = c.file.clone();
492 }
493 let callers = if c.caller_count > 0 {
494 format!(" [{} callers]", c.caller_count)
495 } else {
496 String::new()
497 };
498 out.push_str(&format!(
499 " {:>20} {:<10} {}{}\n",
500 c.change.to_string(),
501 c.kind,
502 c.name,
503 callers
504 ));
505 }
506
507 out
508}