1use crate::core::property_graph::{CodeGraph, Edge, EdgeKind, Node};
9use std::collections::HashSet;
10use std::path::Path;
11
12#[derive(Debug, Clone)]
17pub struct CommitInfo {
18 pub hash: String,
19 pub short_hash: String,
20 pub author: String,
21 pub date: String,
22 pub message: String,
23 pub files_changed: Vec<String>,
24}
25
26pub fn index_git_history(
27 graph: &CodeGraph,
28 project_root: &Path,
29 max_commits: usize,
30) -> anyhow::Result<EnrichmentStats> {
31 let mut stats = EnrichmentStats::default();
32
33 let output = std::process::Command::new("git")
34 .args([
35 "log",
36 &format!("-{max_commits}"),
37 "--format=%H%n%h%n%an%n%ai%n%s",
38 "--name-only",
39 ])
40 .current_dir(project_root)
41 .output();
42
43 let output = match output {
44 Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
45 _ => return Ok(stats),
46 };
47
48 let commits = parse_git_log(&output);
49 for commit in &commits {
50 let commit_node =
51 Node::commit(&commit.short_hash, &commit.message).with_metadata(&format!(
52 "{{\"author\":\"{}\",\"date\":\"{}\",\"hash\":\"{}\"}}",
53 commit.author, commit.date, commit.hash
54 ));
55
56 let commit_id = graph.upsert_node(&commit_node)?;
57 stats.commits_indexed += 1;
58
59 for file in &commit.files_changed {
60 if let Some(file_node) = graph.get_node_by_path(file)? {
61 if let Some(file_id) = file_node.id {
62 graph.upsert_edge(&Edge::new(file_id, commit_id, EdgeKind::ChangedIn))?;
63 stats.edges_created += 1;
64 }
65 }
66 }
67 }
68
69 Ok(stats)
70}
71
72fn parse_git_log(output: &str) -> Vec<CommitInfo> {
73 let mut commits = Vec::new();
74 let mut lines = output.lines().peekable();
75
76 while lines.peek().is_some() {
77 let hash = match lines.next() {
78 Some(h) if !h.is_empty() && h.len() >= 7 => h.to_string(),
79 _ => {
80 lines.next();
81 continue;
82 }
83 };
84
85 let short_hash = match lines.next() {
86 Some(s) => s.to_string(),
87 None => break,
88 };
89 let author = match lines.next() {
90 Some(a) => a.to_string(),
91 None => break,
92 };
93 let date = match lines.next() {
94 Some(d) => d.to_string(),
95 None => break,
96 };
97 let message = match lines.next() {
98 Some(m) => m.to_string(),
99 None => break,
100 };
101
102 let mut files_changed = Vec::new();
103 while let Some(line) = lines.peek() {
104 if line.is_empty() {
105 lines.next();
106 break;
107 }
108 files_changed.push(line.to_string());
109 lines.next();
110 }
111
112 commits.push(CommitInfo {
113 hash,
114 short_hash,
115 author,
116 date,
117 message,
118 files_changed,
119 });
120 }
121
122 commits
123}
124
125const TEST_PATTERNS: &[&str] = &[
130 "_test.",
131 "test_",
132 ".test.",
133 ".spec.",
134 "_spec.",
135 "tests/",
136 "__tests__/",
137];
138
139pub fn index_tests(graph: &CodeGraph, project_root: &Path) -> anyhow::Result<EnrichmentStats> {
140 let mut stats = EnrichmentStats::default();
141
142 let output = std::process::Command::new("git")
143 .args(["ls-files"])
144 .current_dir(project_root)
145 .output();
146
147 let files: Vec<String> = match output {
148 Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout)
149 .lines()
150 .map(ToString::to_string)
151 .collect(),
152 _ => return Ok(stats),
153 };
154
155 for file in &files {
156 if !is_test_file(file) {
157 continue;
158 }
159
160 let test_node = Node::test(file, file);
161 let test_id = graph.upsert_node(&test_node)?;
162 stats.tests_indexed += 1;
163
164 let tested_file = infer_tested_file(file);
165 if let Some(ref tested) = tested_file {
166 if files.contains(tested) {
167 let target_node = graph.get_node_by_path(tested)?;
168 if let Some(target) = target_node {
169 if let Some(target_id) = target.id {
170 graph.upsert_edge(&Edge::new(target_id, test_id, EdgeKind::TestedBy))?;
171 stats.edges_created += 1;
172 }
173 } else {
174 let file_id = graph.upsert_node(&Node::file(tested))?;
175 graph.upsert_edge(&Edge::new(file_id, test_id, EdgeKind::TestedBy))?;
176 stats.edges_created += 1;
177 }
178 }
179 }
180 }
181
182 Ok(stats)
183}
184
185fn is_test_file(path: &str) -> bool {
186 let lower = path.to_lowercase();
187 TEST_PATTERNS.iter().any(|p| lower.contains(p))
188}
189
190fn infer_tested_file(test_path: &str) -> Option<String> {
191 let name = Path::new(test_path).file_name()?.to_str()?;
192
193 for pattern in &["_test.", ".test.", "_spec.", ".spec."] {
194 if let Some(pos) = name.find(pattern) {
195 let base = &name[..pos];
196 let ext = &name[pos + pattern.len() - 1..];
197 let parent = Path::new(test_path).parent()?;
198
199 let candidate = parent.join(format!("{base}{ext}"));
200 if let Some(s) = candidate.to_str() {
201 return Some(s.replace('\\', "/"));
202 }
203
204 if let Some(pp) = parent.parent() {
205 let src_candidate = pp.join("src").join(format!("{base}{ext}"));
206 if let Some(s) = src_candidate.to_str() {
207 return Some(s.replace('\\', "/"));
208 }
209 }
210 }
211 }
212
213 if let Some(base) = name.strip_prefix("test_") {
214 let parent = Path::new(test_path).parent()?;
215 let candidate = parent.join(base);
216 return candidate.to_str().map(|s| s.replace('\\', "/"));
217 }
218
219 None
220}
221
222pub fn index_knowledge(graph: &CodeGraph, project_root: &str) -> anyhow::Result<EnrichmentStats> {
227 let mut stats = EnrichmentStats::default();
228
229 let knowledge = crate::core::knowledge::ProjectKnowledge::load(project_root);
230 let Some(knowledge) = knowledge else {
231 return Ok(stats);
232 };
233
234 let mut mentioned_files: HashSet<String> = HashSet::new();
235
236 for fact in &knowledge.facts {
237 let node = Node::knowledge(&fact.key, &format!("[{}] {}", fact.category, fact.value));
238 let knowledge_id = graph.upsert_node(&node)?;
239 stats.knowledge_indexed += 1;
240
241 for file_ref in extract_file_refs(&fact.value) {
242 if mentioned_files.insert(format!("{}:{}", fact.key, file_ref)) {
243 if let Some(file_node) = graph.get_node_by_path(&file_ref)? {
244 if let Some(file_id) = file_node.id {
245 graph.upsert_edge(&Edge::new(
246 file_id,
247 knowledge_id,
248 EdgeKind::MentionedIn,
249 ))?;
250 stats.edges_created += 1;
251 }
252 }
253 }
254 }
255 }
256
257 Ok(stats)
258}
259
260fn extract_file_refs(text: &str) -> Vec<String> {
261 let mut refs = Vec::new();
262 for word in text.split_whitespace() {
263 let cleaned = word.trim_matches(|c: char| c == '`' || c == '\'' || c == '"' || c == ',');
264 if looks_like_file_path(cleaned) {
265 refs.push(cleaned.to_string());
266 }
267 }
268 refs
269}
270
271fn looks_like_file_path(s: &str) -> bool {
272 if s.len() < 4 || s.len() > 200 {
273 return false;
274 }
275 let path = Path::new(s);
276 let has_sep = s.contains('/') || s.contains('\\');
277 match path.extension().and_then(|e| e.to_str()) {
278 Some(ext) => {
279 let ext_lower = ext.to_ascii_lowercase();
280 has_sep
281 || matches!(
282 ext_lower.as_str(),
283 "rs" | "ts"
284 | "py"
285 | "js"
286 | "go"
287 | "java"
288 | "tsx"
289 | "jsx"
290 | "rb"
291 | "c"
292 | "cpp"
293 | "h"
294 | "cs"
295 | "swift"
296 | "kt"
297 )
298 }
299 None => false,
300 }
301}
302
303#[derive(Debug, Default)]
308pub struct EnrichmentStats {
309 pub commits_indexed: usize,
310 pub tests_indexed: usize,
311 pub knowledge_indexed: usize,
312 pub edges_created: usize,
313}
314
315impl EnrichmentStats {
316 pub fn merge(&mut self, other: &Self) {
317 self.commits_indexed += other.commits_indexed;
318 self.tests_indexed += other.tests_indexed;
319 self.knowledge_indexed += other.knowledge_indexed;
320 self.edges_created += other.edges_created;
321 }
322
323 pub fn format_summary(&self) -> String {
324 format!(
325 "Graph enriched: {} commits, {} tests, {} knowledge entries, {} edges",
326 self.commits_indexed, self.tests_indexed, self.knowledge_indexed, self.edges_created
327 )
328 }
329}
330
331pub fn enrich_graph(
332 graph: &CodeGraph,
333 project_root: &Path,
334 max_commits: usize,
335) -> anyhow::Result<EnrichmentStats> {
336 let mut total = EnrichmentStats::default();
337
338 let git_stats = index_git_history(graph, project_root, max_commits)?;
339 total.merge(&git_stats);
340
341 let test_stats = index_tests(graph, project_root)?;
342 total.merge(&test_stats);
343
344 if let Some(root_str) = project_root.to_str() {
345 let knowledge_stats = index_knowledge(graph, root_str)?;
346 total.merge(&knowledge_stats);
347 }
348
349 Ok(total)
350}
351
352#[cfg(test)]
357mod tests {
358 use super::*;
359 use crate::core::property_graph::NodeKind;
360
361 #[test]
362 fn parse_git_log_basic() {
363 let log = "abc1234567890abcdef1234567890abcdef12345678\nabc1234\nJohn Doe\n2026-04-28 12:00:00 +0200\nfeat: add feature\nsrc/main.rs\nsrc/lib.rs\n\n";
364 let commits = parse_git_log(log);
365 assert_eq!(commits.len(), 1);
366 assert_eq!(commits[0].short_hash, "abc1234");
367 assert_eq!(commits[0].author, "John Doe");
368 assert_eq!(commits[0].files_changed.len(), 2);
369 }
370
371 #[test]
372 fn parse_git_log_multiple() {
373 let log = "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2\na1b2c3d\nAlice\n2026-04-27\nfirst\nfile1.rs\n\nf6e5d4c3b2a1f6e5d4c3b2a1f6e5d4c3b2a1f6e5\nf6e5d4c\nBob\n2026-04-28\nsecond\nfile2.rs\nfile3.rs\n\n";
374 let commits = parse_git_log(log);
375 assert_eq!(commits.len(), 2);
376 assert_eq!(commits[1].files_changed.len(), 2);
377 }
378
379 #[test]
380 fn is_test_file_detection() {
381 assert!(is_test_file("src/utils_test.rs"));
382 assert!(is_test_file("tests/integration.rs"));
383 assert!(is_test_file("src/component.test.ts"));
384 assert!(is_test_file("src/component.spec.js"));
385 assert!(is_test_file("__tests__/app.js"));
386 assert!(!is_test_file("src/main.rs"));
387 assert!(!is_test_file("src/utils.rs"));
388 }
389
390 #[test]
391 fn infer_tested_file_from_test() {
392 assert_eq!(
393 infer_tested_file("src/utils_test.rs"),
394 Some("src/utils.rs".to_string())
395 );
396 assert_eq!(
397 infer_tested_file("src/component.test.ts"),
398 Some("src/component.ts".to_string())
399 );
400 assert_eq!(
401 infer_tested_file("src/app.spec.js"),
402 Some("src/app.js".to_string())
403 );
404 }
405
406 #[test]
407 fn infer_tested_file_prefix() {
408 assert_eq!(
409 infer_tested_file("tests/test_parser.py"),
410 Some("tests/parser.py".to_string())
411 );
412 }
413
414 #[test]
415 fn looks_like_file_path_detection() {
416 assert!(looks_like_file_path("src/main.rs"));
417 assert!(looks_like_file_path("core/utils.ts"));
418 assert!(looks_like_file_path("main.py"));
419 assert!(!looks_like_file_path("hello"));
420 assert!(!looks_like_file_path("a.b"));
421 assert!(!looks_like_file_path(".hidden"));
422 }
423
424 #[test]
425 fn extract_file_refs_from_text() {
426 let text = "Changed `src/main.rs` and core/utils.ts for the fix";
427 let refs = extract_file_refs(text);
428 assert!(refs.contains(&"src/main.rs".to_string()));
429 assert!(refs.contains(&"core/utils.ts".to_string()));
430 }
431
432 #[test]
433 fn enrichment_stats_merge() {
434 let mut a = EnrichmentStats {
435 commits_indexed: 5,
436 tests_indexed: 3,
437 knowledge_indexed: 2,
438 edges_created: 10,
439 };
440 let b = EnrichmentStats {
441 commits_indexed: 2,
442 tests_indexed: 1,
443 knowledge_indexed: 0,
444 edges_created: 4,
445 };
446 a.merge(&b);
447 assert_eq!(a.commits_indexed, 7);
448 assert_eq!(a.edges_created, 14);
449 }
450
451 #[test]
452 fn enrichment_stats_format() {
453 let s = EnrichmentStats {
454 commits_indexed: 10,
455 tests_indexed: 5,
456 knowledge_indexed: 3,
457 edges_created: 20,
458 };
459 let fmt = s.format_summary();
460 assert!(fmt.contains("10 commits"));
461 assert!(fmt.contains("5 tests"));
462 }
463
464 #[test]
465 fn commit_node_construction() {
466 let node = Node::commit("abc1234", "feat: add feature");
467 assert_eq!(node.kind, NodeKind::Commit);
468 assert_eq!(node.name, "abc1234");
469 }
470
471 #[test]
472 fn test_node_construction() {
473 let node = Node::test("src/utils_test.rs", "src/utils_test.rs");
474 assert_eq!(node.kind, NodeKind::Test);
475 assert_eq!(node.file_path, "src/utils_test.rs");
476 }
477
478 #[test]
479 fn knowledge_node_construction() {
480 let node = Node::knowledge("k1", "Database uses PostgreSQL");
481 assert_eq!(node.kind, NodeKind::Knowledge);
482 assert!(node.metadata.unwrap().contains("PostgreSQL"));
483 }
484
485 #[test]
486 fn graph_commit_and_edge() {
487 let g = CodeGraph::open_in_memory().unwrap();
488 let file_id = g.upsert_node(&Node::file("src/main.rs")).unwrap();
489 let commit_id = g.upsert_node(&Node::commit("abc1234", "fix bug")).unwrap();
490 g.upsert_edge(&Edge::new(file_id, commit_id, EdgeKind::ChangedIn))
491 .unwrap();
492
493 let edges = g.edges_from(file_id).unwrap();
494 assert_eq!(edges.len(), 1);
495 assert_eq!(edges[0].kind, EdgeKind::ChangedIn);
496 }
497
498 #[test]
499 fn graph_test_edge() {
500 let g = CodeGraph::open_in_memory().unwrap();
501 let code_id = g.upsert_node(&Node::file("src/utils.rs")).unwrap();
502 let test_id = g
503 .upsert_node(&Node::test("src/utils_test.rs", "test_parse"))
504 .unwrap();
505 g.upsert_edge(&Edge::new(code_id, test_id, EdgeKind::TestedBy))
506 .unwrap();
507
508 let edges = g.edges_from(code_id).unwrap();
509 assert_eq!(edges[0].kind, EdgeKind::TestedBy);
510 }
511
512 #[test]
513 fn graph_knowledge_edge() {
514 let g = CodeGraph::open_in_memory().unwrap();
515 let file_id = g.upsert_node(&Node::file("src/db.rs")).unwrap();
516 let k_id = g
517 .upsert_node(&Node::knowledge("db_type", "Uses PostgreSQL"))
518 .unwrap();
519 g.upsert_edge(&Edge::new(file_id, k_id, EdgeKind::MentionedIn))
520 .unwrap();
521
522 let edges = g.edges_from(file_id).unwrap();
523 assert_eq!(edges[0].kind, EdgeKind::MentionedIn);
524 }
525}