Skip to main content

lean_ctx/core/property_graph/
mod.rs

1//! Property Graph Engine — SQLite-backed code knowledge graph.
2//!
3//! Stores nodes (File, Symbol, Module) and edges (imports, calls, defines,
4//! exports) extracted by `deep_queries` + `import_resolver`.  Provides
5//! efficient traversal queries for impact analysis, architecture discovery,
6//! and graph-driven context loading.
7
8mod edge;
9pub mod file_catalog;
10mod meta;
11mod node;
12mod queries;
13mod schema;
14
15pub use edge::{Edge, EdgeKind};
16pub use file_catalog::FileCatalogEntry;
17pub use meta::{load_meta, meta_path, write_meta, PropertyGraphMetaV1};
18pub use node::{Node, NodeKind};
19pub use queries::{
20    edge_weight, file_connectivity, related_files, DependencyChain, GraphQuery, ImpactResult,
21};
22
23use rusqlite::Connection;
24use std::path::{Path, PathBuf};
25
26/// Resolve the directory for graph.db and graph.meta.json.
27///
28/// Uses `$LEAN_CTX_DATA_DIR/graphs/<project_hash>/` (consistent with
29/// `ProjectIndex::index_dir`).  Falls back to `<project>/.lean-ctx/`
30/// only when the global data directory cannot be resolved.
31pub fn graph_dir(project_root: &str) -> PathBuf {
32    if let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() {
33        let normalized = crate::core::graph_index::normalize_project_root(project_root);
34        let hash = crate::core::project_hash::hash_project_root(&normalized);
35        data_dir.join("graphs").join(hash)
36    } else {
37        Path::new(project_root).join(".lean-ctx")
38    }
39}
40
41/// Transparently migrate graph.db and graph.meta.json from the old
42/// per-project `.lean-ctx/` directory to the new `$DATA_DIR/graphs/` path.
43fn migrate_if_needed(project_root: &str, new_dir: &Path) {
44    let old_dir = Path::new(project_root).join(".lean-ctx");
45    if old_dir == new_dir {
46        return;
47    }
48    for file in &["graph.db", "graph.meta.json"] {
49        let old = old_dir.join(file);
50        let new = new_dir.join(file);
51        if old.exists()
52            && !new.exists()
53            && std::fs::rename(&old, &new).is_err()
54            && std::fs::copy(&old, &new).is_ok()
55        {
56            let _ = std::fs::remove_file(&old);
57        }
58    }
59}
60
61pub struct CodeGraph {
62    conn: Connection,
63    db_path: PathBuf,
64}
65
66impl CodeGraph {
67    pub fn open(project_root: &str) -> anyhow::Result<Self> {
68        let db_dir = graph_dir(project_root);
69        std::fs::create_dir_all(&db_dir)?;
70        migrate_if_needed(project_root, &db_dir);
71        let db_path = db_dir.join("graph.db");
72        let conn = Connection::open(&db_path)?;
73        conn.busy_timeout(std::time::Duration::from_secs(5))?;
74        schema::initialize(&conn)?;
75        Ok(Self { conn, db_path })
76    }
77
78    pub fn open_in_memory() -> anyhow::Result<Self> {
79        let conn = Connection::open_in_memory()?;
80        schema::initialize(&conn)?;
81        Ok(Self {
82            conn,
83            db_path: PathBuf::from(":memory:"),
84        })
85    }
86
87    pub fn db_path(&self) -> &Path {
88        &self.db_path
89    }
90
91    pub fn connection(&self) -> &Connection {
92        &self.conn
93    }
94
95    pub fn upsert_node(&self, node: &Node) -> anyhow::Result<i64> {
96        node::upsert(&self.conn, node)
97    }
98
99    pub fn upsert_edge(&self, edge: &Edge) -> anyhow::Result<()> {
100        edge::upsert(&self.conn, edge)
101    }
102
103    pub fn get_node_by_path(&self, file_path: &str) -> anyhow::Result<Option<Node>> {
104        node::get_by_path(&self.conn, file_path)
105    }
106
107    pub fn get_node_by_symbol(&self, name: &str, file_path: &str) -> anyhow::Result<Option<Node>> {
108        node::get_by_symbol(&self.conn, name, file_path)
109    }
110
111    pub fn remove_file_nodes(&self, file_path: &str) -> anyhow::Result<()> {
112        node::remove_by_file(&self.conn, file_path)
113    }
114
115    pub fn edges_from(&self, node_id: i64) -> anyhow::Result<Vec<Edge>> {
116        edge::from_node(&self.conn, node_id)
117    }
118
119    pub fn edges_to(&self, node_id: i64) -> anyhow::Result<Vec<Edge>> {
120        edge::to_node(&self.conn, node_id)
121    }
122
123    pub fn dependents(&self, file_path: &str) -> anyhow::Result<Vec<String>> {
124        queries::dependents(&self.conn, file_path)
125    }
126
127    pub fn dependencies(&self, file_path: &str) -> anyhow::Result<Vec<String>> {
128        queries::dependencies(&self.conn, file_path)
129    }
130
131    pub fn impact_analysis(
132        &self,
133        file_path: &str,
134        max_depth: usize,
135    ) -> anyhow::Result<ImpactResult> {
136        queries::impact_analysis(&self.conn, file_path, max_depth)
137    }
138
139    pub fn dependency_chain(
140        &self,
141        from: &str,
142        to: &str,
143    ) -> anyhow::Result<Option<DependencyChain>> {
144        queries::dependency_chain(&self.conn, from, to)
145    }
146
147    pub fn related_files(
148        &self,
149        file_path: &str,
150        limit: usize,
151    ) -> anyhow::Result<Vec<(String, f64)>> {
152        queries::related_files(&self.conn, file_path, limit)
153    }
154
155    pub fn file_connectivity(
156        &self,
157        file_path: &str,
158    ) -> anyhow::Result<std::collections::HashMap<String, (usize, usize)>> {
159        queries::file_connectivity(&self.conn, file_path)
160    }
161
162    pub fn node_count(&self) -> anyhow::Result<usize> {
163        node::count(&self.conn)
164    }
165
166    pub fn edge_count(&self) -> anyhow::Result<usize> {
167        edge::count(&self.conn)
168    }
169
170    pub fn clear(&self) -> anyhow::Result<()> {
171        self.conn
172            .execute_batch("DELETE FROM edges; DELETE FROM nodes; DELETE FROM file_catalog;")?;
173        Ok(())
174    }
175
176    pub fn upsert_file_catalog(&self, entry: &FileCatalogEntry) -> anyhow::Result<()> {
177        file_catalog::upsert(&self.conn, entry)
178    }
179
180    pub fn get_file_catalog(&self, path: &str) -> anyhow::Result<Option<FileCatalogEntry>> {
181        file_catalog::get(&self.conn, path)
182    }
183
184    pub fn file_catalog_count(&self) -> anyhow::Result<usize> {
185        file_catalog::count(&self.conn)
186    }
187
188    pub fn file_catalog_paths(&self) -> anyhow::Result<Vec<String>> {
189        file_catalog::all_paths(&self.conn)
190    }
191
192    pub fn find_symbols(
193        &self,
194        name: &str,
195        file_filter: Option<&str>,
196        kind_filter: Option<&str>,
197    ) -> anyhow::Result<Vec<Node>> {
198        node::find_symbols(&self.conn, name, file_filter, kind_filter)
199    }
200
201    pub fn symbol_count(&self) -> anyhow::Result<usize> {
202        node::symbol_count(&self.conn)
203    }
204
205    pub fn all_edges_flat(&self) -> anyhow::Result<Vec<(String, String, String, f64)>> {
206        node::all_edges_flat(&self.conn)
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    fn test_graph() -> CodeGraph {
215        CodeGraph::open_in_memory().unwrap()
216    }
217
218    #[test]
219    fn create_and_query_nodes() {
220        let g = test_graph();
221
222        let id = g.upsert_node(&Node::file("src/main.rs")).unwrap();
223        assert!(id > 0);
224
225        let found = g.get_node_by_path("src/main.rs").unwrap();
226        assert!(found.is_some());
227        assert_eq!(found.unwrap().file_path, "src/main.rs");
228    }
229
230    #[test]
231    fn create_and_query_edges() {
232        let g = test_graph();
233
234        let a = g.upsert_node(&Node::file("src/a.rs")).unwrap();
235        let b = g.upsert_node(&Node::file("src/b.rs")).unwrap();
236
237        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
238
239        let from_a = g.edges_from(a).unwrap();
240        assert_eq!(from_a.len(), 1);
241        assert_eq!(from_a[0].target_id, b);
242
243        let to_b = g.edges_to(b).unwrap();
244        assert_eq!(to_b.len(), 1);
245        assert_eq!(to_b[0].source_id, a);
246    }
247
248    #[test]
249    fn dependents_query() {
250        let g = test_graph();
251
252        let main = g.upsert_node(&Node::file("src/main.rs")).unwrap();
253        let lib = g.upsert_node(&Node::file("src/lib.rs")).unwrap();
254        let utils = g.upsert_node(&Node::file("src/utils.rs")).unwrap();
255
256        g.upsert_edge(&Edge::new(main, lib, EdgeKind::Imports))
257            .unwrap();
258        g.upsert_edge(&Edge::new(utils, lib, EdgeKind::Imports))
259            .unwrap();
260
261        let deps = g.dependents("src/lib.rs").unwrap();
262        assert_eq!(deps.len(), 2);
263        assert!(deps.contains(&"src/main.rs".to_string()));
264        assert!(deps.contains(&"src/utils.rs".to_string()));
265    }
266
267    #[test]
268    fn dependencies_query() {
269        let g = test_graph();
270
271        let main = g.upsert_node(&Node::file("src/main.rs")).unwrap();
272        let lib = g.upsert_node(&Node::file("src/lib.rs")).unwrap();
273        let config = g.upsert_node(&Node::file("src/config.rs")).unwrap();
274
275        g.upsert_edge(&Edge::new(main, lib, EdgeKind::Imports))
276            .unwrap();
277        g.upsert_edge(&Edge::new(main, config, EdgeKind::Imports))
278            .unwrap();
279
280        let deps = g.dependencies("src/main.rs").unwrap();
281        assert_eq!(deps.len(), 2);
282    }
283
284    #[test]
285    #[allow(clippy::many_single_char_names)] // graph test nodes: a, b, c, d, e
286    fn impact_analysis_depth() {
287        let g = test_graph();
288
289        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
290        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
291        let c = g.upsert_node(&Node::file("c.rs")).unwrap();
292        let d = g.upsert_node(&Node::file("d.rs")).unwrap();
293
294        g.upsert_edge(&Edge::new(b, a, EdgeKind::Imports)).unwrap();
295        g.upsert_edge(&Edge::new(c, b, EdgeKind::Imports)).unwrap();
296        g.upsert_edge(&Edge::new(d, c, EdgeKind::Imports)).unwrap();
297
298        let impact = g.impact_analysis("a.rs", 2).unwrap();
299        assert!(impact.affected_files.contains(&"b.rs".to_string()));
300        assert!(impact.affected_files.contains(&"c.rs".to_string()));
301        assert!(!impact.affected_files.contains(&"d.rs".to_string()));
302
303        let deep = g.impact_analysis("a.rs", 10).unwrap();
304        assert!(deep.affected_files.contains(&"d.rs".to_string()));
305    }
306
307    #[test]
308    fn upsert_idempotent() {
309        let g = test_graph();
310
311        let id1 = g.upsert_node(&Node::file("src/main.rs")).unwrap();
312        let id2 = g.upsert_node(&Node::file("src/main.rs")).unwrap();
313        assert_eq!(id1, id2);
314        assert_eq!(g.node_count().unwrap(), 1);
315    }
316
317    #[test]
318    fn remove_file_cascades() {
319        let g = test_graph();
320
321        let a = g.upsert_node(&Node::file("src/a.rs")).unwrap();
322        let b = g.upsert_node(&Node::file("src/b.rs")).unwrap();
323        let sym = g
324            .upsert_node(&Node::symbol("MyStruct", "src/a.rs", NodeKind::Symbol))
325            .unwrap();
326
327        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
328        g.upsert_edge(&Edge::new(sym, b, EdgeKind::Calls)).unwrap();
329
330        g.remove_file_nodes("src/a.rs").unwrap();
331
332        assert!(g.get_node_by_path("src/a.rs").unwrap().is_none());
333        assert_eq!(g.edge_count().unwrap(), 0);
334    }
335
336    #[test]
337    fn dependency_chain_found() {
338        let g = test_graph();
339
340        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
341        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
342        let c = g.upsert_node(&Node::file("c.rs")).unwrap();
343
344        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
345        g.upsert_edge(&Edge::new(b, c, EdgeKind::Imports)).unwrap();
346
347        let chain = g.dependency_chain("a.rs", "c.rs").unwrap();
348        assert!(chain.is_some());
349        let chain = chain.unwrap();
350        assert_eq!(chain.path, vec!["a.rs", "b.rs", "c.rs"]);
351    }
352
353    #[test]
354    fn counts() {
355        let g = test_graph();
356        assert_eq!(g.node_count().unwrap(), 0);
357        assert_eq!(g.edge_count().unwrap(), 0);
358
359        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
360        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
361        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
362
363        assert_eq!(g.node_count().unwrap(), 2);
364        assert_eq!(g.edge_count().unwrap(), 1);
365    }
366
367    #[test]
368    fn multi_edge_dependents() {
369        let g = test_graph();
370
371        let a = g.upsert_node(&Node::file("src/a.rs")).unwrap();
372        let b = g.upsert_node(&Node::file("src/b.rs")).unwrap();
373        let c = g.upsert_node(&Node::file("src/c.rs")).unwrap();
374
375        g.upsert_edge(&Edge::new(b, a, EdgeKind::Imports)).unwrap();
376        g.upsert_edge(&Edge::new(c, a, EdgeKind::Calls)).unwrap();
377
378        let deps = g.dependents("src/a.rs").unwrap();
379        assert_eq!(deps.len(), 2);
380        assert!(deps.contains(&"src/b.rs".to_string()));
381        assert!(deps.contains(&"src/c.rs".to_string()));
382    }
383
384    #[test]
385    fn multi_edge_impact_analysis() {
386        let g = test_graph();
387
388        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
389        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
390        let c = g.upsert_node(&Node::file("c.rs")).unwrap();
391
392        g.upsert_edge(&Edge::new(b, a, EdgeKind::Imports)).unwrap();
393        g.upsert_edge(&Edge::new(c, b, EdgeKind::Calls)).unwrap();
394
395        let impact = g.impact_analysis("a.rs", 10).unwrap();
396        assert!(impact.affected_files.contains(&"b.rs".to_string()));
397        assert!(impact.affected_files.contains(&"c.rs".to_string()));
398    }
399
400    #[test]
401    fn related_files_scored() {
402        let g = test_graph();
403
404        let a = g.upsert_node(&Node::file("a.rs")).unwrap();
405        let b = g.upsert_node(&Node::file("b.rs")).unwrap();
406        let c = g.upsert_node(&Node::file("c.rs")).unwrap();
407
408        g.upsert_edge(&Edge::new(a, b, EdgeKind::Imports)).unwrap();
409        g.upsert_edge(&Edge::new(a, b, EdgeKind::Calls)).unwrap();
410        g.upsert_edge(&Edge::new(a, c, EdgeKind::TypeRef)).unwrap();
411
412        let related = g.related_files("a.rs", 10).unwrap();
413        assert_eq!(related.len(), 2);
414        let b_score = related.iter().find(|(p, _)| p == "b.rs").unwrap().1;
415        let c_score = related.iter().find(|(p, _)| p == "c.rs").unwrap().1;
416        assert!(
417            b_score > c_score,
418            "b.rs has imports+calls, should rank higher than c.rs with type_ref"
419        );
420    }
421
422    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
423        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
424        LOCK.lock()
425            .unwrap_or_else(std::sync::PoisonError::into_inner)
426    }
427
428    #[test]
429    fn graph_dir_uses_data_dir_when_set() {
430        let tmp = tempfile::tempdir().unwrap();
431        let project = tmp.path().join("myproject");
432        std::fs::create_dir_all(&project).unwrap();
433
434        let data_dir = tmp.path().join("data");
435        std::fs::create_dir_all(&data_dir).unwrap();
436
437        let _guard = env_lock();
438        std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.to_str().unwrap());
439
440        let dir = graph_dir(project.to_str().unwrap());
441        assert!(dir.starts_with(&data_dir));
442        assert!(dir.to_string_lossy().contains("graphs"));
443
444        std::env::remove_var("LEAN_CTX_DATA_DIR");
445    }
446
447    #[test]
448    fn graph_dir_returns_consistent_hash_dir() {
449        let tmp = tempfile::tempdir().unwrap();
450        let project = tmp.path().join("hash_project");
451        std::fs::create_dir_all(&project).unwrap();
452
453        let data_dir = tmp.path().join("data2");
454        std::fs::create_dir_all(&data_dir).unwrap();
455
456        let _guard = env_lock();
457        std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.to_str().unwrap());
458
459        let dir1 = graph_dir(project.to_str().unwrap());
460        let dir2 = graph_dir(project.to_str().unwrap());
461        assert_eq!(dir1, dir2, "graph_dir should be deterministic");
462        assert!(dir1.to_string_lossy().contains("graphs"));
463
464        std::env::remove_var("LEAN_CTX_DATA_DIR");
465    }
466
467    #[test]
468    fn migration_moves_old_files() {
469        let tmp = tempfile::tempdir().unwrap();
470        let project = tmp.path().join("migtest");
471        let old_dir = project.join(".lean-ctx");
472        std::fs::create_dir_all(&old_dir).unwrap();
473        std::fs::write(old_dir.join("graph.db"), b"old-db-content").unwrap();
474        std::fs::write(old_dir.join("graph.meta.json"), b"old-meta").unwrap();
475
476        let new_dir = tmp.path().join("newloc");
477        std::fs::create_dir_all(&new_dir).unwrap();
478
479        migrate_if_needed(project.to_str().unwrap(), &new_dir);
480
481        assert!(new_dir.join("graph.db").exists());
482        assert!(new_dir.join("graph.meta.json").exists());
483        assert!(!old_dir.join("graph.db").exists());
484        assert!(!old_dir.join("graph.meta.json").exists());
485        assert_eq!(
486            std::fs::read_to_string(new_dir.join("graph.db")).unwrap(),
487            "old-db-content"
488        );
489    }
490
491    #[test]
492    fn migration_skips_when_new_exists() {
493        let tmp = tempfile::tempdir().unwrap();
494        let project = tmp.path().join("skiptest");
495        let old_dir = project.join(".lean-ctx");
496        std::fs::create_dir_all(&old_dir).unwrap();
497        std::fs::write(old_dir.join("graph.db"), b"old").unwrap();
498
499        let new_dir = tmp.path().join("newloc2");
500        std::fs::create_dir_all(&new_dir).unwrap();
501        std::fs::write(new_dir.join("graph.db"), b"already-there").unwrap();
502
503        migrate_if_needed(project.to_str().unwrap(), &new_dir);
504
505        assert_eq!(
506            std::fs::read_to_string(new_dir.join("graph.db")).unwrap(),
507            "already-there"
508        );
509        assert!(old_dir.join("graph.db").exists());
510    }
511
512    #[test]
513    fn open_with_data_dir() {
514        let tmp = tempfile::tempdir().unwrap();
515        let project = tmp.path().join("opentest");
516        std::fs::create_dir_all(&project).unwrap();
517
518        let data_dir = tmp.path().join("xdata");
519        std::fs::create_dir_all(&data_dir).unwrap();
520
521        let _guard = env_lock();
522        std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.to_str().unwrap());
523
524        let g = CodeGraph::open(project.to_str().unwrap()).unwrap();
525        assert!(g.db_path().starts_with(&data_dir));
526        assert!(g.db_path().to_string_lossy().contains("graph.db"));
527
528        std::env::remove_var("LEAN_CTX_DATA_DIR");
529    }
530
531    #[test]
532    fn meta_path_uses_graph_dir() {
533        let tmp = tempfile::tempdir().unwrap();
534        let project = tmp.path().join("metatest");
535        std::fs::create_dir_all(&project).unwrap();
536
537        let data_dir = tmp.path().join("mdata");
538        std::fs::create_dir_all(&data_dir).unwrap();
539
540        let _guard = env_lock();
541        std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.to_str().unwrap());
542
543        let mp = meta::meta_path(project.to_str().unwrap());
544        assert!(mp.starts_with(&data_dir));
545        assert!(mp.to_string_lossy().contains("graph.meta.json"));
546
547        std::env::remove_var("LEAN_CTX_DATA_DIR");
548    }
549}