Skip to main content

lean_ctx/core/
graph_enricher.rs

1//! Unified Graph Enricher — indexes Git history, tests, and knowledge into the PropertyGraph.
2//!
3//! Three enrichment passes:
4//! 1. **Git commits**: `git log` → Commit nodes + `changed_in` edges
5//! 2. **Test files**: naming/annotation heuristics → Test nodes + `tested_by` edges
6//! 3. **Knowledge bridge**: `ctx_knowledge` facts → Knowledge nodes + `mentioned_in` edges
7
8use crate::core::property_graph::{CodeGraph, Edge, EdgeKind, Node};
9use std::collections::HashSet;
10use std::path::Path;
11
12// ---------------------------------------------------------------------------
13// Git History Indexer
14// ---------------------------------------------------------------------------
15
16#[derive(Debug, Clone)]
17pub struct CommitInfo {
18    pub hash: String,
19    pub short_hash: String,
20    pub author: String,
21    pub date: String,
22    pub message: String,
23    pub files_changed: Vec<String>,
24}
25
26pub fn index_git_history(
27    graph: &CodeGraph,
28    project_root: &Path,
29    max_commits: usize,
30) -> anyhow::Result<EnrichmentStats> {
31    let mut stats = EnrichmentStats::default();
32
33    let output = std::process::Command::new("git")
34        .args([
35            "log",
36            &format!("-{max_commits}"),
37            "--format=%H%n%h%n%an%n%ai%n%s",
38            "--name-only",
39        ])
40        .current_dir(project_root)
41        .output();
42
43    let output = match output {
44        Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).to_string(),
45        _ => return Ok(stats),
46    };
47
48    let commits = parse_git_log(&output);
49    for commit in &commits {
50        let commit_node =
51            Node::commit(&commit.short_hash, &commit.message).with_metadata(&format!(
52                "{{\"author\":\"{}\",\"date\":\"{}\",\"hash\":\"{}\"}}",
53                commit.author, commit.date, commit.hash
54            ));
55
56        let commit_id = graph.upsert_node(&commit_node)?;
57        stats.commits_indexed += 1;
58
59        for file in &commit.files_changed {
60            if let Some(file_node) = graph.get_node_by_path(file)? {
61                if let Some(file_id) = file_node.id {
62                    graph.upsert_edge(&Edge::new(file_id, commit_id, EdgeKind::ChangedIn))?;
63                    stats.edges_created += 1;
64                }
65            }
66        }
67    }
68
69    Ok(stats)
70}
71
72fn parse_git_log(output: &str) -> Vec<CommitInfo> {
73    let mut commits = Vec::new();
74    let mut lines = output.lines().peekable();
75
76    while lines.peek().is_some() {
77        let hash = match lines.next() {
78            Some(h) if !h.is_empty() && h.len() >= 7 => h.to_string(),
79            _ => {
80                lines.next();
81                continue;
82            }
83        };
84
85        let short_hash = match lines.next() {
86            Some(s) => s.to_string(),
87            None => break,
88        };
89        let author = match lines.next() {
90            Some(a) => a.to_string(),
91            None => break,
92        };
93        let date = match lines.next() {
94            Some(d) => d.to_string(),
95            None => break,
96        };
97        let message = match lines.next() {
98            Some(m) => m.to_string(),
99            None => break,
100        };
101
102        let mut files_changed = Vec::new();
103        while let Some(line) = lines.peek() {
104            if line.is_empty() {
105                lines.next();
106                break;
107            }
108            files_changed.push(line.to_string());
109            lines.next();
110        }
111
112        commits.push(CommitInfo {
113            hash,
114            short_hash,
115            author,
116            date,
117            message,
118            files_changed,
119        });
120    }
121
122    commits
123}
124
125// ---------------------------------------------------------------------------
126// Test Indexer
127// ---------------------------------------------------------------------------
128
129const TEST_PATTERNS: &[&str] = &[
130    "_test.",
131    "test_",
132    ".test.",
133    ".spec.",
134    "_spec.",
135    "tests/",
136    "__tests__/",
137];
138
139pub fn index_tests(graph: &CodeGraph, project_root: &Path) -> anyhow::Result<EnrichmentStats> {
140    let mut stats = EnrichmentStats::default();
141
142    let output = std::process::Command::new("git")
143        .args(["ls-files"])
144        .current_dir(project_root)
145        .output();
146
147    let files: Vec<String> = match output {
148        Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout)
149            .lines()
150            .map(ToString::to_string)
151            .collect(),
152        _ => return Ok(stats),
153    };
154
155    for file in &files {
156        if !is_test_file(file) {
157            continue;
158        }
159
160        let test_node = Node::test(file, file);
161        let test_id = graph.upsert_node(&test_node)?;
162        stats.tests_indexed += 1;
163
164        let tested_file = infer_tested_file(file);
165        if let Some(ref tested) = tested_file {
166            if files.contains(tested) {
167                let target_node = graph.get_node_by_path(tested)?;
168                if let Some(target) = target_node {
169                    if let Some(target_id) = target.id {
170                        graph.upsert_edge(&Edge::new(target_id, test_id, EdgeKind::TestedBy))?;
171                        stats.edges_created += 1;
172                    }
173                } else {
174                    let file_id = graph.upsert_node(&Node::file(tested))?;
175                    graph.upsert_edge(&Edge::new(file_id, test_id, EdgeKind::TestedBy))?;
176                    stats.edges_created += 1;
177                }
178            }
179        }
180    }
181
182    Ok(stats)
183}
184
185fn is_test_file(path: &str) -> bool {
186    let lower = path.to_lowercase();
187    TEST_PATTERNS.iter().any(|p| lower.contains(p))
188}
189
190fn infer_tested_file(test_path: &str) -> Option<String> {
191    let name = Path::new(test_path).file_name()?.to_str()?;
192
193    for pattern in &["_test.", ".test.", "_spec.", ".spec."] {
194        if let Some(pos) = name.find(pattern) {
195            let base = &name[..pos];
196            let ext = &name[pos + pattern.len() - 1..];
197            let parent = Path::new(test_path).parent()?;
198
199            let candidate = parent.join(format!("{base}{ext}"));
200            if let Some(s) = candidate.to_str() {
201                return Some(s.replace('\\', "/"));
202            }
203
204            if let Some(pp) = parent.parent() {
205                let src_candidate = pp.join("src").join(format!("{base}{ext}"));
206                if let Some(s) = src_candidate.to_str() {
207                    return Some(s.replace('\\', "/"));
208                }
209            }
210        }
211    }
212
213    if let Some(base) = name.strip_prefix("test_") {
214        let parent = Path::new(test_path).parent()?;
215        let candidate = parent.join(base);
216        return candidate.to_str().map(|s| s.replace('\\', "/"));
217    }
218
219    None
220}
221
222// ---------------------------------------------------------------------------
223// Knowledge Bridge
224// ---------------------------------------------------------------------------
225
226pub fn index_knowledge(graph: &CodeGraph, project_root: &str) -> anyhow::Result<EnrichmentStats> {
227    let mut stats = EnrichmentStats::default();
228
229    let knowledge = crate::core::knowledge::ProjectKnowledge::load(project_root);
230    let Some(knowledge) = knowledge else {
231        return Ok(stats);
232    };
233
234    let mut mentioned_files: HashSet<String> = HashSet::new();
235
236    for fact in &knowledge.facts {
237        let node = Node::knowledge(&fact.key, &format!("[{}] {}", fact.category, fact.value));
238        let knowledge_id = graph.upsert_node(&node)?;
239        stats.knowledge_indexed += 1;
240
241        for file_ref in extract_file_refs(&fact.value) {
242            if mentioned_files.insert(format!("{}:{}", fact.key, file_ref)) {
243                if let Some(file_node) = graph.get_node_by_path(&file_ref)? {
244                    if let Some(file_id) = file_node.id {
245                        graph.upsert_edge(&Edge::new(
246                            file_id,
247                            knowledge_id,
248                            EdgeKind::MentionedIn,
249                        ))?;
250                        stats.edges_created += 1;
251                    }
252                }
253            }
254        }
255    }
256
257    Ok(stats)
258}
259
260fn extract_file_refs(text: &str) -> Vec<String> {
261    let mut refs = Vec::new();
262    for word in text.split_whitespace() {
263        let cleaned = word.trim_matches(|c: char| c == '`' || c == '\'' || c == '"' || c == ',');
264        if looks_like_file_path(cleaned) {
265            refs.push(cleaned.to_string());
266        }
267    }
268    refs
269}
270
271fn looks_like_file_path(s: &str) -> bool {
272    if s.len() < 4 || s.len() > 200 {
273        return false;
274    }
275    let path = Path::new(s);
276    let has_sep = s.contains('/') || s.contains('\\');
277    match path.extension().and_then(|e| e.to_str()) {
278        Some(ext) => {
279            let ext_lower = ext.to_ascii_lowercase();
280            has_sep
281                || matches!(
282                    ext_lower.as_str(),
283                    "rs" | "ts"
284                        | "py"
285                        | "js"
286                        | "go"
287                        | "java"
288                        | "tsx"
289                        | "jsx"
290                        | "rb"
291                        | "c"
292                        | "cpp"
293                        | "h"
294                        | "cs"
295                        | "swift"
296                        | "kt"
297                )
298        }
299        None => false,
300    }
301}
302
303// ---------------------------------------------------------------------------
304// Full enrichment pipeline
305// ---------------------------------------------------------------------------
306
307#[derive(Debug, Default)]
308pub struct EnrichmentStats {
309    pub commits_indexed: usize,
310    pub tests_indexed: usize,
311    pub knowledge_indexed: usize,
312    pub edges_created: usize,
313}
314
315impl EnrichmentStats {
316    pub fn merge(&mut self, other: &Self) {
317        self.commits_indexed += other.commits_indexed;
318        self.tests_indexed += other.tests_indexed;
319        self.knowledge_indexed += other.knowledge_indexed;
320        self.edges_created += other.edges_created;
321    }
322
323    pub fn format_summary(&self) -> String {
324        format!(
325            "Graph enriched: {} commits, {} tests, {} knowledge entries, {} edges",
326            self.commits_indexed, self.tests_indexed, self.knowledge_indexed, self.edges_created
327        )
328    }
329}
330
331pub fn enrich_graph(
332    graph: &CodeGraph,
333    project_root: &Path,
334    max_commits: usize,
335) -> anyhow::Result<EnrichmentStats> {
336    let mut total = EnrichmentStats::default();
337
338    let git_stats = index_git_history(graph, project_root, max_commits)?;
339    total.merge(&git_stats);
340
341    let test_stats = index_tests(graph, project_root)?;
342    total.merge(&test_stats);
343
344    if let Some(root_str) = project_root.to_str() {
345        let knowledge_stats = index_knowledge(graph, root_str)?;
346        total.merge(&knowledge_stats);
347    }
348
349    Ok(total)
350}
351
352// ---------------------------------------------------------------------------
353// Tests
354// ---------------------------------------------------------------------------
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359    use crate::core::property_graph::NodeKind;
360
361    #[test]
362    fn parse_git_log_basic() {
363        let log = "abc1234567890abcdef1234567890abcdef12345678\nabc1234\nJohn Doe\n2026-04-28 12:00:00 +0200\nfeat: add feature\nsrc/main.rs\nsrc/lib.rs\n\n";
364        let commits = parse_git_log(log);
365        assert_eq!(commits.len(), 1);
366        assert_eq!(commits[0].short_hash, "abc1234");
367        assert_eq!(commits[0].author, "John Doe");
368        assert_eq!(commits[0].files_changed.len(), 2);
369    }
370
371    #[test]
372    fn parse_git_log_multiple() {
373        let log = "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2\na1b2c3d\nAlice\n2026-04-27\nfirst\nfile1.rs\n\nf6e5d4c3b2a1f6e5d4c3b2a1f6e5d4c3b2a1f6e5\nf6e5d4c\nBob\n2026-04-28\nsecond\nfile2.rs\nfile3.rs\n\n";
374        let commits = parse_git_log(log);
375        assert_eq!(commits.len(), 2);
376        assert_eq!(commits[1].files_changed.len(), 2);
377    }
378
379    #[test]
380    fn is_test_file_detection() {
381        assert!(is_test_file("src/utils_test.rs"));
382        assert!(is_test_file("tests/integration.rs"));
383        assert!(is_test_file("src/component.test.ts"));
384        assert!(is_test_file("src/component.spec.js"));
385        assert!(is_test_file("__tests__/app.js"));
386        assert!(!is_test_file("src/main.rs"));
387        assert!(!is_test_file("src/utils.rs"));
388    }
389
390    #[test]
391    fn infer_tested_file_from_test() {
392        assert_eq!(
393            infer_tested_file("src/utils_test.rs"),
394            Some("src/utils.rs".to_string())
395        );
396        assert_eq!(
397            infer_tested_file("src/component.test.ts"),
398            Some("src/component.ts".to_string())
399        );
400        assert_eq!(
401            infer_tested_file("src/app.spec.js"),
402            Some("src/app.js".to_string())
403        );
404    }
405
406    #[test]
407    fn infer_tested_file_prefix() {
408        assert_eq!(
409            infer_tested_file("tests/test_parser.py"),
410            Some("tests/parser.py".to_string())
411        );
412    }
413
414    #[test]
415    fn looks_like_file_path_detection() {
416        assert!(looks_like_file_path("src/main.rs"));
417        assert!(looks_like_file_path("core/utils.ts"));
418        assert!(looks_like_file_path("main.py"));
419        assert!(!looks_like_file_path("hello"));
420        assert!(!looks_like_file_path("a.b"));
421        assert!(!looks_like_file_path(".hidden"));
422    }
423
424    #[test]
425    fn extract_file_refs_from_text() {
426        let text = "Changed `src/main.rs` and core/utils.ts for the fix";
427        let refs = extract_file_refs(text);
428        assert!(refs.contains(&"src/main.rs".to_string()));
429        assert!(refs.contains(&"core/utils.ts".to_string()));
430    }
431
432    #[test]
433    fn enrichment_stats_merge() {
434        let mut a = EnrichmentStats {
435            commits_indexed: 5,
436            tests_indexed: 3,
437            knowledge_indexed: 2,
438            edges_created: 10,
439        };
440        let b = EnrichmentStats {
441            commits_indexed: 2,
442            tests_indexed: 1,
443            knowledge_indexed: 0,
444            edges_created: 4,
445        };
446        a.merge(&b);
447        assert_eq!(a.commits_indexed, 7);
448        assert_eq!(a.edges_created, 14);
449    }
450
451    #[test]
452    fn enrichment_stats_format() {
453        let s = EnrichmentStats {
454            commits_indexed: 10,
455            tests_indexed: 5,
456            knowledge_indexed: 3,
457            edges_created: 20,
458        };
459        let fmt = s.format_summary();
460        assert!(fmt.contains("10 commits"));
461        assert!(fmt.contains("5 tests"));
462    }
463
464    #[test]
465    fn commit_node_construction() {
466        let node = Node::commit("abc1234", "feat: add feature");
467        assert_eq!(node.kind, NodeKind::Commit);
468        assert_eq!(node.name, "abc1234");
469    }
470
471    #[test]
472    fn test_node_construction() {
473        let node = Node::test("src/utils_test.rs", "src/utils_test.rs");
474        assert_eq!(node.kind, NodeKind::Test);
475        assert_eq!(node.file_path, "src/utils_test.rs");
476    }
477
478    #[test]
479    fn knowledge_node_construction() {
480        let node = Node::knowledge("k1", "Database uses PostgreSQL");
481        assert_eq!(node.kind, NodeKind::Knowledge);
482        assert!(node.metadata.unwrap().contains("PostgreSQL"));
483    }
484
485    #[test]
486    fn graph_commit_and_edge() {
487        let g = CodeGraph::open_in_memory().unwrap();
488        let file_id = g.upsert_node(&Node::file("src/main.rs")).unwrap();
489        let commit_id = g.upsert_node(&Node::commit("abc1234", "fix bug")).unwrap();
490        g.upsert_edge(&Edge::new(file_id, commit_id, EdgeKind::ChangedIn))
491            .unwrap();
492
493        let edges = g.edges_from(file_id).unwrap();
494        assert_eq!(edges.len(), 1);
495        assert_eq!(edges[0].kind, EdgeKind::ChangedIn);
496    }
497
498    #[test]
499    fn graph_test_edge() {
500        let g = CodeGraph::open_in_memory().unwrap();
501        let code_id = g.upsert_node(&Node::file("src/utils.rs")).unwrap();
502        let test_id = g
503            .upsert_node(&Node::test("src/utils_test.rs", "test_parse"))
504            .unwrap();
505        g.upsert_edge(&Edge::new(code_id, test_id, EdgeKind::TestedBy))
506            .unwrap();
507
508        let edges = g.edges_from(code_id).unwrap();
509        assert_eq!(edges[0].kind, EdgeKind::TestedBy);
510    }
511
512    #[test]
513    fn graph_knowledge_edge() {
514        let g = CodeGraph::open_in_memory().unwrap();
515        let file_id = g.upsert_node(&Node::file("src/db.rs")).unwrap();
516        let k_id = g
517            .upsert_node(&Node::knowledge("db_type", "Uses PostgreSQL"))
518            .unwrap();
519        g.upsert_edge(&Edge::new(file_id, k_id, EdgeKind::MentionedIn))
520            .unwrap();
521
522        let edges = g.edges_from(file_id).unwrap();
523        assert_eq!(edges[0].kind, EdgeKind::MentionedIn);
524    }
525}