1use crate::core::property_graph::{CodeGraph, Edge, EdgeKind, Node};
9use std::collections::HashSet;
10use std::path::Path;
11
12#[derive(Debug, Clone)]
17pub struct CommitInfo {
18 pub hash: String,
19 pub short_hash: String,
20 pub author: String,
21 pub date: String,
22 pub message: String,
23 pub files_changed: Vec<String>,
24}
25
26pub fn index_git_history(
27 graph: &CodeGraph,
28 project_root: &Path,
29 max_commits: usize,
30) -> anyhow::Result<EnrichmentStats> {
31 let mut stats = EnrichmentStats::default();
32
33 let output = std::process::Command::new("git")
34 .args([
35 "log",
36 &format!("-{max_commits}"),
37 "--format=%H%n%h%n%an%n%ai%n%s",
38 "--name-only",
39 ])
40 .current_dir(project_root)
41 .output();
42
43 let output = match output {
44 Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
45 _ => return Ok(stats),
46 };
47
48 let commits = parse_git_log(&output);
49 for commit in &commits {
50 let commit_node =
51 Node::commit(&commit.short_hash, &commit.message).with_metadata(&format!(
52 "{{\"author\":\"{}\",\"date\":\"{}\",\"hash\":\"{}\"}}",
53 commit.author, commit.date, commit.hash
54 ));
55
56 let commit_id = graph.upsert_node(&commit_node)?;
57 stats.commits_indexed += 1;
58
59 for file in &commit.files_changed {
60 if let Some(file_node) = graph.get_node_by_path(file)? {
61 if let Some(file_id) = file_node.id {
62 graph.upsert_edge(&Edge::new(file_id, commit_id, EdgeKind::ChangedIn))?;
63 stats.edges_created += 1;
64 }
65 }
66 }
67 }
68
69 Ok(stats)
70}
71
72fn parse_git_log(output: &str) -> Vec<CommitInfo> {
73 let mut commits = Vec::new();
74 let mut lines = output.lines().peekable();
75
76 while lines.peek().is_some() {
77 let hash = match lines.next() {
78 Some(h) if !h.is_empty() && h.len() >= 7 => h.to_string(),
79 _ => {
80 lines.next();
81 continue;
82 }
83 };
84
85 let short_hash = match lines.next() {
86 Some(s) => s.to_string(),
87 None => break,
88 };
89 let author = match lines.next() {
90 Some(a) => a.to_string(),
91 None => break,
92 };
93 let date = match lines.next() {
94 Some(d) => d.to_string(),
95 None => break,
96 };
97 let message = match lines.next() {
98 Some(m) => m.to_string(),
99 None => break,
100 };
101
102 let mut files_changed = Vec::new();
103 while let Some(line) = lines.peek() {
104 if line.is_empty() {
105 lines.next();
106 break;
107 }
108 files_changed.push(line.to_string());
109 lines.next();
110 }
111
112 commits.push(CommitInfo {
113 hash,
114 short_hash,
115 author,
116 date,
117 message,
118 files_changed,
119 });
120 }
121
122 commits
123}
124
125const TEST_PATTERNS: &[&str] = &[
130 "_test.",
131 "test_",
132 ".test.",
133 ".spec.",
134 "_spec.",
135 "tests/",
136 "__tests__/",
137];
138
139pub fn index_tests(graph: &CodeGraph, project_root: &Path) -> anyhow::Result<EnrichmentStats> {
140 let mut stats = EnrichmentStats::default();
141
142 let output = std::process::Command::new("git")
143 .args(["ls-files"])
144 .current_dir(project_root)
145 .output();
146
147 let files: Vec<String> = match output {
148 Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout)
149 .lines()
150 .map(ToString::to_string)
151 .collect(),
152 _ => return Ok(stats),
153 };
154
155 for file in &files {
156 if !is_test_file(file) {
157 continue;
158 }
159
160 let test_node = Node::test(file, file);
161 let test_id = graph.upsert_node(&test_node)?;
162 stats.tests_indexed += 1;
163
164 let tested_file = infer_tested_file(file);
165 if let Some(ref tested) = tested_file {
166 if files.contains(tested) {
167 let target_node = graph.get_node_by_path(tested)?;
168 if let Some(target) = target_node {
169 if let Some(target_id) = target.id {
170 graph.upsert_edge(&Edge::new(target_id, test_id, EdgeKind::TestedBy))?;
171 stats.edges_created += 1;
172 }
173 } else {
174 let file_id = graph.upsert_node(&Node::file(tested))?;
175 graph.upsert_edge(&Edge::new(file_id, test_id, EdgeKind::TestedBy))?;
176 stats.edges_created += 1;
177 }
178 }
179 }
180 }
181
182 Ok(stats)
183}
184
185fn is_test_file(path: &str) -> bool {
186 let lower = path.to_lowercase();
187 TEST_PATTERNS.iter().any(|p| lower.contains(p))
188}
189
190fn infer_tested_file(test_path: &str) -> Option<String> {
191 let name = Path::new(test_path).file_name()?.to_str()?;
192
193 for pattern in &["_test.", ".test.", "_spec.", ".spec."] {
194 if let Some(pos) = name.find(pattern) {
195 let base = &name[..pos];
196 let ext = &name[pos + pattern.len() - 1..];
197 let parent = Path::new(test_path).parent()?;
198
199 let candidate = parent.join(format!("{base}{ext}"));
200 if let Some(s) = candidate.to_str() {
201 return Some(s.replace('\\', "/"));
202 }
203
204 if let Some(pp) = parent.parent() {
205 let src_candidate = pp.join("src").join(format!("{base}{ext}"));
206 if let Some(s) = src_candidate.to_str() {
207 return Some(s.replace('\\', "/"));
208 }
209 }
210 }
211 }
212
213 if let Some(base) = name.strip_prefix("test_") {
214 let parent = Path::new(test_path).parent()?;
215 let candidate = parent.join(base);
216 return candidate.to_str().map(|s| s.replace('\\', "/"));
217 }
218
219 None
220}
221
222pub fn index_knowledge(graph: &CodeGraph, project_root: &str) -> anyhow::Result<EnrichmentStats> {
227 let mut stats = EnrichmentStats::default();
228
229 let knowledge = crate::core::knowledge::ProjectKnowledge::load(project_root);
230 let Some(knowledge) = knowledge else {
231 return Ok(stats);
232 };
233
234 let mut mentioned_files: HashSet<String> = HashSet::new();
235
236 for fact in &knowledge.facts {
237 let node = Node::knowledge(&fact.key, &format!("[{}] {}", fact.category, fact.value));
238 let knowledge_id = graph.upsert_node(&node)?;
239 stats.knowledge_indexed += 1;
240
241 for file_ref in extract_file_refs(&fact.value) {
242 if mentioned_files.insert(format!("{}:{}", fact.key, file_ref)) {
243 if let Some(file_node) = graph.get_node_by_path(&file_ref)? {
244 if let Some(file_id) = file_node.id {
245 graph.upsert_edge(&Edge::new(
246 file_id,
247 knowledge_id,
248 EdgeKind::MentionedIn,
249 ))?;
250 stats.edges_created += 1;
251 }
252 }
253 }
254 }
255 }
256
257 Ok(stats)
258}
259
260fn extract_file_refs(text: &str) -> Vec<String> {
261 let mut refs = Vec::new();
262 for word in text.split_whitespace() {
263 let cleaned = word.trim_matches(|c: char| c == '`' || c == '\'' || c == '"' || c == ',');
264 if looks_like_file_path(cleaned) {
265 refs.push(cleaned.to_string());
266 }
267 }
268 refs
269}
270
271fn looks_like_file_path(s: &str) -> bool {
272 if s.len() < 4 || s.len() > 200 {
273 return false;
274 }
275 let path = Path::new(s);
276 let has_sep = s.contains('/') || s.contains('\\');
277 match path.extension().and_then(|e| e.to_str()) {
278 Some(ext) => {
279 let ext_lower = ext.to_ascii_lowercase();
280 has_sep
281 || matches!(
282 ext_lower.as_str(),
283 "rs" | "ts"
284 | "py"
285 | "js"
286 | "go"
287 | "java"
288 | "tsx"
289 | "jsx"
290 | "rb"
291 | "c"
292 | "cpp"
293 | "h"
294 | "cs"
295 | "swift"
296 | "kt"
297 )
298 }
299 None => false,
300 }
301}
302
303#[derive(Debug, Default)]
308pub struct EnrichmentStats {
309 pub commits_indexed: usize,
310 pub tests_indexed: usize,
311 pub knowledge_indexed: usize,
312 pub edges_created: usize,
313}
314
315impl EnrichmentStats {
316 pub fn merge(&mut self, other: &Self) {
317 self.commits_indexed += other.commits_indexed;
318 self.tests_indexed += other.tests_indexed;
319 self.knowledge_indexed += other.knowledge_indexed;
320 self.edges_created += other.edges_created;
321 }
322
323 pub fn format_summary(&self) -> String {
324 format!(
325 "Graph enriched: {} commits, {} tests, {} knowledge entries, {} edges",
326 self.commits_indexed, self.tests_indexed, self.knowledge_indexed, self.edges_created
327 )
328 }
329}
330
331pub fn enrich_graph(
332 graph: &CodeGraph,
333 project_root: &Path,
334 max_commits: usize,
335) -> anyhow::Result<EnrichmentStats> {
336 let mut total = EnrichmentStats::default();
337
338 let git_stats = index_git_history(graph, project_root, max_commits)?;
339 total.merge(&git_stats);
340
341 let test_stats = index_tests(graph, project_root)?;
342 total.merge(&test_stats);
343
344 if let Some(root_str) = project_root.to_str() {
345 let knowledge_stats = index_knowledge(graph, root_str)?;
346 total.merge(&knowledge_stats);
347
348 let callgraph_stats = consolidate_callgraph(graph, root_str)?;
349 total.merge(&callgraph_stats);
350 }
351
352 Ok(total)
353}
354
355fn consolidate_callgraph(graph: &CodeGraph, project_root: &str) -> anyhow::Result<EnrichmentStats> {
356 let mut stats = EnrichmentStats::default();
357
358 let index = crate::core::graph_index::load_or_build(project_root);
359 let call_graph = crate::core::call_graph::CallGraph::load_or_build(project_root, &index);
360
361 let callee_to_file: std::collections::HashMap<&str, &str> = index
362 .symbols
363 .values()
364 .map(|s| (s.name.as_str(), s.file.as_str()))
365 .collect();
366
367 for edge in &call_graph.edges {
368 let from_file = &edge.caller_file;
369 let to_file = match callee_to_file.get(edge.callee_name.as_str()) {
370 Some(f) => *f,
371 None => continue,
372 };
373
374 if from_file == to_file {
375 continue;
376 }
377
378 let from_node = graph.get_node_by_path(from_file)?;
379 let to_node = graph.get_node_by_path(to_file)?;
380
381 if let (Some(from_n), Some(to_n)) = (from_node, to_node) {
382 if let (Some(from_id), Some(to_id)) = (from_n.id, to_n.id) {
383 graph.upsert_edge(&Edge::new(from_id, to_id, EdgeKind::Calls))?;
384 stats.edges_created += 1;
385 }
386 }
387 }
388
389 Ok(stats)
390}
391
392#[cfg(test)]
397mod tests {
398 use super::*;
399 use crate::core::property_graph::NodeKind;
400
401 #[test]
402 fn parse_git_log_basic() {
403 let log = "abc1234567890abcdef1234567890abcdef12345678\nabc1234\nJohn Doe\n2026-04-28 12:00:00 +0200\nfeat: add feature\nsrc/main.rs\nsrc/lib.rs\n\n";
404 let commits = parse_git_log(log);
405 assert_eq!(commits.len(), 1);
406 assert_eq!(commits[0].short_hash, "abc1234");
407 assert_eq!(commits[0].author, "John Doe");
408 assert_eq!(commits[0].files_changed.len(), 2);
409 }
410
411 #[test]
412 fn parse_git_log_multiple() {
413 let log = "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2\na1b2c3d\nAlice\n2026-04-27\nfirst\nfile1.rs\n\nf6e5d4c3b2a1f6e5d4c3b2a1f6e5d4c3b2a1f6e5\nf6e5d4c\nBob\n2026-04-28\nsecond\nfile2.rs\nfile3.rs\n\n";
414 let commits = parse_git_log(log);
415 assert_eq!(commits.len(), 2);
416 assert_eq!(commits[1].files_changed.len(), 2);
417 }
418
419 #[test]
420 fn is_test_file_detection() {
421 assert!(is_test_file("src/utils_test.rs"));
422 assert!(is_test_file("tests/integration.rs"));
423 assert!(is_test_file("src/component.test.ts"));
424 assert!(is_test_file("src/component.spec.js"));
425 assert!(is_test_file("__tests__/app.js"));
426 assert!(!is_test_file("src/main.rs"));
427 assert!(!is_test_file("src/utils.rs"));
428 }
429
430 #[test]
431 fn infer_tested_file_from_test() {
432 assert_eq!(
433 infer_tested_file("src/utils_test.rs"),
434 Some("src/utils.rs".to_string())
435 );
436 assert_eq!(
437 infer_tested_file("src/component.test.ts"),
438 Some("src/component.ts".to_string())
439 );
440 assert_eq!(
441 infer_tested_file("src/app.spec.js"),
442 Some("src/app.js".to_string())
443 );
444 }
445
446 #[test]
447 fn infer_tested_file_prefix() {
448 assert_eq!(
449 infer_tested_file("tests/test_parser.py"),
450 Some("tests/parser.py".to_string())
451 );
452 }
453
454 #[test]
455 fn looks_like_file_path_detection() {
456 assert!(looks_like_file_path("src/main.rs"));
457 assert!(looks_like_file_path("core/utils.ts"));
458 assert!(looks_like_file_path("main.py"));
459 assert!(!looks_like_file_path("hello"));
460 assert!(!looks_like_file_path("a.b"));
461 assert!(!looks_like_file_path(".hidden"));
462 }
463
464 #[test]
465 fn extract_file_refs_from_text() {
466 let text = "Changed `src/main.rs` and core/utils.ts for the fix";
467 let refs = extract_file_refs(text);
468 assert!(refs.contains(&"src/main.rs".to_string()));
469 assert!(refs.contains(&"core/utils.ts".to_string()));
470 }
471
472 #[test]
473 fn enrichment_stats_merge() {
474 let mut a = EnrichmentStats {
475 commits_indexed: 5,
476 tests_indexed: 3,
477 knowledge_indexed: 2,
478 edges_created: 10,
479 };
480 let b = EnrichmentStats {
481 commits_indexed: 2,
482 tests_indexed: 1,
483 knowledge_indexed: 0,
484 edges_created: 4,
485 };
486 a.merge(&b);
487 assert_eq!(a.commits_indexed, 7);
488 assert_eq!(a.edges_created, 14);
489 }
490
491 #[test]
492 fn enrichment_stats_format() {
493 let s = EnrichmentStats {
494 commits_indexed: 10,
495 tests_indexed: 5,
496 knowledge_indexed: 3,
497 edges_created: 20,
498 };
499 let fmt = s.format_summary();
500 assert!(fmt.contains("10 commits"));
501 assert!(fmt.contains("5 tests"));
502 }
503
504 #[test]
505 fn commit_node_construction() {
506 let node = Node::commit("abc1234", "feat: add feature");
507 assert_eq!(node.kind, NodeKind::Commit);
508 assert_eq!(node.name, "abc1234");
509 }
510
511 #[test]
512 fn test_node_construction() {
513 let node = Node::test("src/utils_test.rs", "src/utils_test.rs");
514 assert_eq!(node.kind, NodeKind::Test);
515 assert_eq!(node.file_path, "src/utils_test.rs");
516 }
517
518 #[test]
519 fn knowledge_node_construction() {
520 let node = Node::knowledge("k1", "Database uses PostgreSQL");
521 assert_eq!(node.kind, NodeKind::Knowledge);
522 assert!(node.metadata.unwrap().contains("PostgreSQL"));
523 }
524
525 #[test]
526 fn graph_commit_and_edge() {
527 let g = CodeGraph::open_in_memory().unwrap();
528 let file_id = g.upsert_node(&Node::file("src/main.rs")).unwrap();
529 let commit_id = g.upsert_node(&Node::commit("abc1234", "fix bug")).unwrap();
530 g.upsert_edge(&Edge::new(file_id, commit_id, EdgeKind::ChangedIn))
531 .unwrap();
532
533 let edges = g.edges_from(file_id).unwrap();
534 assert_eq!(edges.len(), 1);
535 assert_eq!(edges[0].kind, EdgeKind::ChangedIn);
536 }
537
538 #[test]
539 fn graph_test_edge() {
540 let g = CodeGraph::open_in_memory().unwrap();
541 let code_id = g.upsert_node(&Node::file("src/utils.rs")).unwrap();
542 let test_id = g
543 .upsert_node(&Node::test("src/utils_test.rs", "test_parse"))
544 .unwrap();
545 g.upsert_edge(&Edge::new(code_id, test_id, EdgeKind::TestedBy))
546 .unwrap();
547
548 let edges = g.edges_from(code_id).unwrap();
549 assert_eq!(edges[0].kind, EdgeKind::TestedBy);
550 }
551
552 #[test]
553 fn graph_knowledge_edge() {
554 let g = CodeGraph::open_in_memory().unwrap();
555 let file_id = g.upsert_node(&Node::file("src/db.rs")).unwrap();
556 let k_id = g
557 .upsert_node(&Node::knowledge("db_type", "Uses PostgreSQL"))
558 .unwrap();
559 g.upsert_edge(&Edge::new(file_id, k_id, EdgeKind::MentionedIn))
560 .unwrap();
561
562 let edges = g.edges_from(file_id).unwrap();
563 assert_eq!(edges[0].kind, EdgeKind::MentionedIn);
564 }
565}