Skip to main content

lean_ctx/core/property_graph/
mod.rs

1//! Property Graph Engine — SQLite-backed code knowledge graph.
2//!
3//! Stores nodes (File, Symbol, Module) and edges (imports, calls, defines,
4//! exports) extracted by `deep_queries` + `import_resolver`.  Provides
5//! efficient traversal queries for impact analysis, architecture discovery,
6//! and graph-driven context loading.
7
8mod edge;
9mod meta;
10mod node;
11mod queries;
12mod schema;
13
14pub use edge::{Edge, EdgeKind};
15pub use meta::{load_meta, meta_path, write_meta, PropertyGraphMetaV1};
16pub use node::{Node, NodeKind};
17pub use queries::{
18    edge_weight, file_connectivity, related_files, DependencyChain, GraphQuery, ImpactResult,
19};
20
21use rusqlite::Connection;
22use std::path::{Path, PathBuf};
23
24/// Resolve the directory for graph.db and graph.meta.json.
25///
26/// Uses `$LEAN_CTX_DATA_DIR/graphs/<project_hash>/` (consistent with
27/// `ProjectIndex::index_dir`).  Falls back to `<project>/.lean-ctx/`
28/// only when the global data directory cannot be resolved.
29pub fn graph_dir(project_root: &str) -> PathBuf {
30    if let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() {
31        let normalized = crate::core::graph_index::normalize_project_root(project_root);
32        let hash = crate::core::project_hash::hash_project_root(&normalized);
33        data_dir.join("graphs").join(hash)
34    } else {
35        Path::new(project_root).join(".lean-ctx")
36    }
37}
38
39/// Transparently migrate graph.db and graph.meta.json from the old
40/// per-project `.lean-ctx/` directory to the new `$DATA_DIR/graphs/` path.
41fn migrate_if_needed(project_root: &str, new_dir: &Path) {
42    let old_dir = Path::new(project_root).join(".lean-ctx");
43    if old_dir == new_dir {
44        return;
45    }
46    for file in &["graph.db", "graph.meta.json"] {
47        let old = old_dir.join(file);
48        let new = new_dir.join(file);
49        if old.exists()
50            && !new.exists()
51            && std::fs::rename(&old, &new).is_err()
52            && std::fs::copy(&old, &new).is_ok()
53        {
54            let _ = std::fs::remove_file(&old);
55        }
56    }
57}
58
59pub struct CodeGraph {
60    conn: Connection,
61    db_path: PathBuf,
62}
63
64impl CodeGraph {
65    pub fn open(project_root: &str) -> anyhow::Result<Self> {
66        let db_dir = graph_dir(project_root);
67        std::fs::create_dir_all(&db_dir)?;
68        migrate_if_needed(project_root, &db_dir);
69        let db_path = db_dir.join("graph.db");
70        let conn = Connection::open(&db_path)?;
71        conn.busy_timeout(std::time::Duration::from_secs(5))?;
72        schema::initialize(&conn)?;
73        Ok(Self { conn, db_path })
74    }
75
76    pub fn open_in_memory() -> anyhow::Result<Self> {
77        let conn = Connection::open_in_memory()?;
78        schema::initialize(&conn)?;
79        Ok(Self {
80            conn,
81            db_path: PathBuf::from(":memory:"),
82        })
83    }
84
85    pub fn db_path(&self) -> &Path {
86        &self.db_path
87    }
88
89    pub fn connection(&self) -> &Connection {
90        &self.conn
91    }
92
93    pub fn upsert_node(&self, node: &Node) -> anyhow::Result<i64> {
94        node::upsert(&self.conn, node)
95    }
96
97    pub fn upsert_edge(&self, edge: &Edge) -> anyhow::Result<()> {
98        edge::upsert(&self.conn, edge)
99    }
100
101    pub fn get_node_by_path(&self, file_path: &str) -> anyhow::Result<Option<Node>> {
102        node::get_by_path(&self.conn, file_path)
103    }
104
105    pub fn get_node_by_symbol(&self, name: &str, file_path: &str) -> anyhow::Result<Option<Node>> {
106        node::get_by_symbol(&self.conn, name, file_path)
107    }
108
109    pub fn remove_file_nodes(&self, file_path: &str) -> anyhow::Result<()> {
110        node::remove_by_file(&self.conn, file_path)
111    }
112
113    pub fn edges_from(&self, node_id: i64) -> anyhow::Result<Vec<Edge>> {
114        edge::from_node(&self.conn, node_id)
115    }
116
117    pub fn edges_to(&self, node_id: i64) -> anyhow::Result<Vec<Edge>> {
118        edge::to_node(&self.conn, node_id)
119    }
120
121    pub fn dependents(&self, file_path: &str) -> anyhow::Result<Vec<String>> {
122        queries::dependents(&self.conn, file_path)
123    }
124
125    pub fn dependencies(&self, file_path: &str) -> anyhow::Result<Vec<String>> {
126        queries::dependencies(&self.conn, file_path)
127    }
128
129    pub fn impact_analysis(
130        &self,
131        file_path: &str,
132        max_depth: usize,
133    ) -> anyhow::Result<ImpactResult> {
134        queries::impact_analysis(&self.conn, file_path, max_depth)
135    }
136
137    pub fn dependency_chain(
138        &self,
139        from: &str,
140        to: &str,
141    ) -> anyhow::Result<Option<DependencyChain>> {
142        queries::dependency_chain(&self.conn, from, to)
143    }
144
145    pub fn related_files(
146        &self,
147        file_path: &str,
148        limit: usize,
149    ) -> anyhow::Result<Vec<(String, f64)>> {
150        queries::related_files(&self.conn, file_path, limit)
151    }
152
153    pub fn file_connectivity(
154        &self,
155        file_path: &str,
156    ) -> anyhow::Result<std::collections::HashMap<String, (usize, usize)>> {
157        queries::file_connectivity(&self.conn, file_path)
158    }
159
160    pub fn node_count(&self) -> anyhow::Result<usize> {
161        node::count(&self.conn)
162    }
163
164    pub fn edge_count(&self) -> anyhow::Result<usize> {
165        edge::count(&self.conn)
166    }
167
168    pub fn clear(&self) -> anyhow::Result<()> {
169        self.conn
170            .execute_batch("DELETE FROM edges; DELETE FROM nodes;")?;
171        Ok(())
172    }
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    fn test_graph() -> CodeGraph {
180        CodeGraph::open_in_memory().unwrap()
181    }
182
183    #[test]
184    fn create_and_query_nodes() {
185        let g = test_graph();
186
187        let id = g.upsert_node(&Node::file("src/main.rs")).unwrap();
188        assert!(id > 0);
189
190        let found = g.get_node_by_path("src/main.rs").unwrap();
191        assert!(found.is_some());
192        assert_eq!(found.unwrap().file_path, "src/main.rs");
193    }
194
195    #[test]
196    fn create_and_query_edges() {
197        let g = test_graph();
198
199        let a = g.upsert_node(&Node::file("src/a.rs")).unwrap();
200        let b = g.upsert_node(&Node::file("src/b.rs")).unwrap();
201
202        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
203
204        let from_a = g.edges_from(a).unwrap();
205        assert_eq!(from_a.len(), 1);
206        assert_eq!(from_a[0].target_id, b);
207
208        let to_b = g.edges_to(b).unwrap();
209        assert_eq!(to_b.len(), 1);
210        assert_eq!(to_b[0].source_id, a);
211    }
212
213    #[test]
214    fn dependents_query() {
215        let g = test_graph();
216
217        let main = g.upsert_node(&Node::file("src/main.rs")).unwrap();
218        let lib = g.upsert_node(&Node::file("src/lib.rs")).unwrap();
219        let utils = g.upsert_node(&Node::file("src/utils.rs")).unwrap();
220
221        g.upsert_edge(&Edge::new(main, lib, EdgeKind::Imports))
222            .unwrap();
223        g.upsert_edge(&Edge::new(utils, lib, EdgeKind::Imports))
224            .unwrap();
225
226        let deps = g.dependents("src/lib.rs").unwrap();
227        assert_eq!(deps.len(), 2);
228        assert!(deps.contains(&"src/main.rs".to_string()));
229        assert!(deps.contains(&"src/utils.rs".to_string()));
230    }
231
232    #[test]
233    fn dependencies_query() {
234        let g = test_graph();
235
236        let main = g.upsert_node(&Node::file("src/main.rs")).unwrap();
237        let lib = g.upsert_node(&Node::file("src/lib.rs")).unwrap();
238        let config = g.upsert_node(&Node::file("src/config.rs")).unwrap();
239
240        g.upsert_edge(&Edge::new(main, lib, EdgeKind::Imports))
241            .unwrap();
242        g.upsert_edge(&Edge::new(main, config, EdgeKind::Imports))
243            .unwrap();
244
245        let deps = g.dependencies("src/main.rs").unwrap();
246        assert_eq!(deps.len(), 2);
247    }
248
249    #[test]
250    #[allow(clippy::many_single_char_names)] // graph test nodes: a, b, c, d, e
251    fn impact_analysis_depth() {
252        let g = test_graph();
253
254        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
255        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
256        let c = g.upsert_node(&Node::file("c.rs")).unwrap();
257        let d = g.upsert_node(&Node::file("d.rs")).unwrap();
258
259        g.upsert_edge(&Edge::new(b, a, EdgeKind::Imports)).unwrap();
260        g.upsert_edge(&Edge::new(c, b, EdgeKind::Imports)).unwrap();
261        g.upsert_edge(&Edge::new(d, c, EdgeKind::Imports)).unwrap();
262
263        let impact = g.impact_analysis("a.rs", 2).unwrap();
264        assert!(impact.affected_files.contains(&"b.rs".to_string()));
265        assert!(impact.affected_files.contains(&"c.rs".to_string()));
266        assert!(!impact.affected_files.contains(&"d.rs".to_string()));
267
268        let deep = g.impact_analysis("a.rs", 10).unwrap();
269        assert!(deep.affected_files.contains(&"d.rs".to_string()));
270    }
271
272    #[test]
273    fn upsert_idempotent() {
274        let g = test_graph();
275
276        let id1 = g.upsert_node(&Node::file("src/main.rs")).unwrap();
277        let id2 = g.upsert_node(&Node::file("src/main.rs")).unwrap();
278        assert_eq!(id1, id2);
279        assert_eq!(g.node_count().unwrap(), 1);
280    }
281
282    #[test]
283    fn remove_file_cascades() {
284        let g = test_graph();
285
286        let a = g.upsert_node(&Node::file("src/a.rs")).unwrap();
287        let b = g.upsert_node(&Node::file("src/b.rs")).unwrap();
288        let sym = g
289            .upsert_node(&Node::symbol("MyStruct", "src/a.rs", NodeKind::Symbol))
290            .unwrap();
291
292        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
293        g.upsert_edge(&Edge::new(sym, b, EdgeKind::Calls)).unwrap();
294
295        g.remove_file_nodes("src/a.rs").unwrap();
296
297        assert!(g.get_node_by_path("src/a.rs").unwrap().is_none());
298        assert_eq!(g.edge_count().unwrap(), 0);
299    }
300
301    #[test]
302    fn dependency_chain_found() {
303        let g = test_graph();
304
305        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
306        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
307        let c = g.upsert_node(&Node::file("c.rs")).unwrap();
308
309        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
310        g.upsert_edge(&Edge::new(b, c, EdgeKind::Imports)).unwrap();
311
312        let chain = g.dependency_chain("a.rs", "c.rs").unwrap();
313        assert!(chain.is_some());
314        let chain = chain.unwrap();
315        assert_eq!(chain.path, vec!["a.rs", "b.rs", "c.rs"]);
316    }
317
318    #[test]
319    fn counts() {
320        let g = test_graph();
321        assert_eq!(g.node_count().unwrap(), 0);
322        assert_eq!(g.edge_count().unwrap(), 0);
323
324        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
325        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
326        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
327
328        assert_eq!(g.node_count().unwrap(), 2);
329        assert_eq!(g.edge_count().unwrap(), 1);
330    }
331
332    #[test]
333    fn multi_edge_dependents() {
334        let g = test_graph();
335
336        let a = g.upsert_node(&Node::file("src/a.rs")).unwrap();
337        let b = g.upsert_node(&Node::file("src/b.rs")).unwrap();
338        let c = g.upsert_node(&Node::file("src/c.rs")).unwrap();
339
340        g.upsert_edge(&Edge::new(b, a, EdgeKind::Imports)).unwrap();
341        g.upsert_edge(&Edge::new(c, a, EdgeKind::Calls)).unwrap();
342
343        let deps = g.dependents("src/a.rs").unwrap();
344        assert_eq!(deps.len(), 2);
345        assert!(deps.contains(&"src/b.rs".to_string()));
346        assert!(deps.contains(&"src/c.rs".to_string()));
347    }
348
349    #[test]
350    fn multi_edge_impact_analysis() {
351        let g = test_graph();
352
353        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
354        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
355        let c = g.upsert_node(&Node::file("c.rs")).unwrap();
356
357        g.upsert_edge(&Edge::new(b, a, EdgeKind::Imports)).unwrap();
358        g.upsert_edge(&Edge::new(c, b, EdgeKind::Calls)).unwrap();
359
360        let impact = g.impact_analysis("a.rs", 10).unwrap();
361        assert!(impact.affected_files.contains(&"b.rs".to_string()));
362        assert!(impact.affected_files.contains(&"c.rs".to_string()));
363    }
364
365    #[test]
366    fn related_files_scored() {
367        let g = test_graph();
368
369        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
370        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
371        let c = g.upsert_node(&Node::file("c.rs")).unwrap();
372
373        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
374        g.upsert_edge(&Edge::new(a, b, EdgeKind::Calls)).unwrap();
375        g.upsert_edge(&Edge::new(a, c, EdgeKind::TypeRef)).unwrap();
376
377        let related = g.related_files("a.rs", 10).unwrap();
378        assert_eq!(related.len(), 2);
379        let b_score = related.iter().find(|(p, _)| p == "b.rs").unwrap().1;
380        let c_score = related.iter().find(|(p, _)| p == "c.rs").unwrap().1;
381        assert!(
382            b_score > c_score,
383            "b.rs has imports+calls, should rank higher than c.rs with type_ref"
384        );
385    }
386
387    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
388        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
389        LOCK.lock()
390            .unwrap_or_else(std::sync::PoisonError::into_inner)
391    }
392
393    #[test]
394    fn graph_dir_uses_data_dir_when_set() {
395        let tmp = tempfile::tempdir().unwrap();
396        let project = tmp.path().join("myproject");
397        std::fs::create_dir_all(&project).unwrap();
398
399        let data_dir = tmp.path().join("data");
400        std::fs::create_dir_all(&data_dir).unwrap();
401
402        let _guard = env_lock();
403        std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.to_str().unwrap());
404
405        let dir = graph_dir(project.to_str().unwrap());
406        assert!(dir.starts_with(&data_dir));
407        assert!(dir.to_string_lossy().contains("graphs"));
408
409        std::env::remove_var("LEAN_CTX_DATA_DIR");
410    }
411
412    #[test]
413    fn graph_dir_returns_consistent_hash_dir() {
414        let tmp = tempfile::tempdir().unwrap();
415        let project = tmp.path().join("hash_project");
416        std::fs::create_dir_all(&project).unwrap();
417
418        let data_dir = tmp.path().join("data2");
419        std::fs::create_dir_all(&data_dir).unwrap();
420
421        let _guard = env_lock();
422        std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.to_str().unwrap());
423
424        let dir1 = graph_dir(project.to_str().unwrap());
425        let dir2 = graph_dir(project.to_str().unwrap());
426        assert_eq!(dir1, dir2, "graph_dir should be deterministic");
427        assert!(dir1.to_string_lossy().contains("graphs"));
428
429        std::env::remove_var("LEAN_CTX_DATA_DIR");
430    }
431
432    #[test]
433    fn migration_moves_old_files() {
434        let tmp = tempfile::tempdir().unwrap();
435        let project = tmp.path().join("migtest");
436        let old_dir = project.join(".lean-ctx");
437        std::fs::create_dir_all(&old_dir).unwrap();
438        std::fs::write(old_dir.join("graph.db"), b"old-db-content").unwrap();
439        std::fs::write(old_dir.join("graph.meta.json"), b"old-meta").unwrap();
440
441        let new_dir = tmp.path().join("newloc");
442        std::fs::create_dir_all(&new_dir).unwrap();
443
444        migrate_if_needed(project.to_str().unwrap(), &new_dir);
445
446        assert!(new_dir.join("graph.db").exists());
447        assert!(new_dir.join("graph.meta.json").exists());
448        assert!(!old_dir.join("graph.db").exists());
449        assert!(!old_dir.join("graph.meta.json").exists());
450        assert_eq!(
451            std::fs::read_to_string(new_dir.join("graph.db")).unwrap(),
452            "old-db-content"
453        );
454    }
455
456    #[test]
457    fn migration_skips_when_new_exists() {
458        let tmp = tempfile::tempdir().unwrap();
459        let project = tmp.path().join("skiptest");
460        let old_dir = project.join(".lean-ctx");
461        std::fs::create_dir_all(&old_dir).unwrap();
462        std::fs::write(old_dir.join("graph.db"), b"old").unwrap();
463
464        let new_dir = tmp.path().join("newloc2");
465        std::fs::create_dir_all(&new_dir).unwrap();
466        std::fs::write(new_dir.join("graph.db"), b"already-there").unwrap();
467
468        migrate_if_needed(project.to_str().unwrap(), &new_dir);
469
470        assert_eq!(
471            std::fs::read_to_string(new_dir.join("graph.db")).unwrap(),
472            "already-there"
473        );
474        assert!(old_dir.join("graph.db").exists());
475    }
476
477    #[test]
478    fn open_with_data_dir() {
479        let tmp = tempfile::tempdir().unwrap();
480        let project = tmp.path().join("opentest");
481        std::fs::create_dir_all(&project).unwrap();
482
483        let data_dir = tmp.path().join("xdata");
484        std::fs::create_dir_all(&data_dir).unwrap();
485
486        let _guard = env_lock();
487        std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.to_str().unwrap());
488
489        let g = CodeGraph::open(project.to_str().unwrap()).unwrap();
490        assert!(g.db_path().starts_with(&data_dir));
491        assert!(g.db_path().to_string_lossy().contains("graph.db"));
492
493        std::env::remove_var("LEAN_CTX_DATA_DIR");
494    }
495
496    #[test]
497    fn meta_path_uses_graph_dir() {
498        let tmp = tempfile::tempdir().unwrap();
499        let project = tmp.path().join("metatest");
500        std::fs::create_dir_all(&project).unwrap();
501
502        let data_dir = tmp.path().join("mdata");
503        std::fs::create_dir_all(&data_dir).unwrap();
504
505        let _guard = env_lock();
506        std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.to_str().unwrap());
507
508        let mp = meta::meta_path(project.to_str().unwrap());
509        assert!(mp.starts_with(&data_dir));
510        assert!(mp.to_string_lossy().contains("graph.meta.json"));
511
512        std::env::remove_var("LEAN_CTX_DATA_DIR");
513    }
514}