Skip to main content

lean_ctx/core/property_graph/
queries.rs

1//! Graph traversal queries: dependents, dependencies, impact analysis,
2//! dependency chains (BFS-based shortest path).
3//!
4//! All traversal queries support multi-edge traversal: imports, calls,
5//! exports, type_ref, tested_by, and more. Edge kinds are weighted
6//! for impact scoring.
7
8use std::collections::{HashMap, HashSet, VecDeque};
9
10use rusqlite::{params, Connection};
11
12#[derive(Debug, Clone)]
13pub struct GraphQuery;
14
15#[derive(Debug, Clone)]
16pub struct ImpactResult {
17    pub root_file: String,
18    pub affected_files: Vec<String>,
19    pub max_depth_reached: usize,
20    pub edges_traversed: usize,
21}
22
23#[derive(Debug, Clone)]
24pub struct DependencyChain {
25    pub path: Vec<String>,
26    pub depth: usize,
27}
28
29/// Edge kinds considered structural (code connectivity).
30const STRUCTURAL_EDGE_KINDS: &str =
31    "'imports','calls','exports','type_ref','tested_by','module','cochange','sibling'";
32
33/// Weight multiplier per edge kind for impact scoring.
34pub fn edge_weight(kind: &str) -> f64 {
35    match kind {
36        "imports" => 1.0,
37        "calls" => 0.8,
38        "exports" => 0.7,
39        "module" => 0.6,
40        "type_ref" => 0.5,
41        "tested_by" => 0.4,
42        "cochange" => 0.35,
43        "defines" => 0.3,
44        "sibling" => 0.25,
45        "changed_in" => 0.2,
46        _ => 0.1,
47    }
48}
49
50/// Files that depend on `file_path` via structural edges (imports, calls, type_ref, etc.).
51pub(super) fn dependents(conn: &Connection, file_path: &str) -> anyhow::Result<Vec<String>> {
52    let sql = format!(
53        "SELECT DISTINCT n_src.file_path
54         FROM edges e
55         JOIN nodes n_src ON e.source_id = n_src.id
56         JOIN nodes n_tgt ON e.target_id = n_tgt.id
57         WHERE n_tgt.file_path = ?1
58           AND n_src.file_path != ?1
59           AND e.kind IN ({STRUCTURAL_EDGE_KINDS})"
60    );
61    let mut stmt = conn.prepare(&sql)?;
62
63    let mut results: Vec<String> = stmt
64        .query_map(params![file_path], |row| row.get(0))?
65        .filter_map(std::result::Result::ok)
66        .collect();
67
68    results.sort();
69    results.dedup();
70    Ok(results)
71}
72
73/// Files that `file_path` depends on via structural edges.
74pub(super) fn dependencies(conn: &Connection, file_path: &str) -> anyhow::Result<Vec<String>> {
75    let sql = format!(
76        "SELECT DISTINCT n_tgt.file_path
77         FROM edges e
78         JOIN nodes n_src ON e.source_id = n_src.id
79         JOIN nodes n_tgt ON e.target_id = n_tgt.id
80         WHERE n_src.file_path = ?1
81           AND n_tgt.file_path != ?1
82           AND e.kind IN ({STRUCTURAL_EDGE_KINDS})"
83    );
84    let mut stmt = conn.prepare(&sql)?;
85
86    let mut results: Vec<String> = stmt
87        .query_map(params![file_path], |row| row.get(0))?
88        .filter_map(std::result::Result::ok)
89        .collect();
90
91    results.sort();
92    results.dedup();
93    Ok(results)
94}
95
96/// Weighted BFS from `file_path` following reverse structural edges up to `max_depth`.
97/// Edge weights attenuate propagation: calls edges carry less impact than imports.
98/// Nodes only propagate when cumulative weight exceeds the threshold (0.1).
99pub(super) fn impact_analysis(
100    conn: &Connection,
101    file_path: &str,
102    max_depth: usize,
103) -> anyhow::Result<ImpactResult> {
104    let reverse_graph = build_weighted_reverse_graph(conn)?;
105    const PROPAGATION_THRESHOLD: f64 = 0.1;
106
107    let mut visited: HashSet<String> = HashSet::new();
108    let mut queue: VecDeque<(String, usize, f64)> = VecDeque::new();
109    let mut max_depth_reached = 0;
110    let mut edges_traversed = 0;
111
112    visited.insert(file_path.to_string());
113    queue.push_back((file_path.to_string(), 0, 1.0));
114
115    while let Some((current, depth, weight)) = queue.pop_front() {
116        if depth >= max_depth {
117            continue;
118        }
119
120        if let Some(dependents) = reverse_graph.get(&current) {
121            for (dep, ew) in dependents {
122                edges_traversed += 1;
123                let propagated = weight * ew;
124                if propagated < PROPAGATION_THRESHOLD {
125                    continue;
126                }
127                if visited.insert(dep.clone()) {
128                    let new_depth = depth + 1;
129                    if new_depth > max_depth_reached {
130                        max_depth_reached = new_depth;
131                    }
132                    queue.push_back((dep.clone(), new_depth, propagated));
133                }
134            }
135        }
136    }
137
138    visited.remove(file_path);
139
140    Ok(ImpactResult {
141        root_file: file_path.to_string(),
142        affected_files: visited.into_iter().collect(),
143        max_depth_reached,
144        edges_traversed,
145    })
146}
147
148/// BFS shortest path from `from` to `to` following structural edges.
149pub(super) fn dependency_chain(
150    conn: &Connection,
151    from: &str,
152    to: &str,
153) -> anyhow::Result<Option<DependencyChain>> {
154    let forward_graph = build_forward_graph(conn)?;
155
156    let mut visited: HashSet<String> = HashSet::new();
157    let mut parent: HashMap<String, String> = HashMap::new();
158    let mut queue: VecDeque<String> = VecDeque::new();
159
160    visited.insert(from.to_string());
161    queue.push_back(from.to_string());
162
163    while let Some(current) = queue.pop_front() {
164        if current == to {
165            let mut path = vec![to.to_string()];
166            let mut cursor = to.to_string();
167            while let Some(prev) = parent.get(&cursor) {
168                path.push(prev.clone());
169                cursor = prev.clone();
170            }
171            path.reverse();
172            let depth = path.len() - 1;
173            return Ok(Some(DependencyChain { path, depth }));
174        }
175
176        if let Some(deps) = forward_graph.get(&current) {
177            for dep in deps {
178                if visited.insert(dep.clone()) {
179                    parent.insert(dep.clone(), current.clone());
180                    queue.push_back(dep.clone());
181                }
182            }
183        }
184    }
185
186    Ok(None)
187}
188
189/// Related files for a given path: direct neighbors via any structural edge,
190/// sorted by edge weight (strongest relationship first). Returns (path, weight) pairs.
191pub fn related_files(
192    conn: &Connection,
193    file_path: &str,
194    limit: usize,
195) -> anyhow::Result<Vec<(String, f64)>> {
196    let sql = format!(
197        "SELECT n_other.file_path, e.kind
198         FROM edges e
199         JOIN nodes n_self ON (e.source_id = n_self.id OR e.target_id = n_self.id)
200         JOIN nodes n_other ON (
201             (e.source_id = n_other.id AND e.target_id = n_self.id)
202             OR (e.target_id = n_other.id AND e.source_id = n_self.id)
203         )
204         WHERE n_self.file_path = ?1
205           AND n_other.file_path != ?1
206           AND e.kind IN ({STRUCTURAL_EDGE_KINDS})"
207    );
208    let mut stmt = conn.prepare(&sql)?;
209
210    let mut scores: HashMap<String, f64> = HashMap::new();
211    let rows = stmt.query_map(params![file_path], |row| {
212        Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
213    })?;
214
215    for row in rows {
216        let (path, kind) = row?;
217        *scores.entry(path).or_default() += edge_weight(&kind);
218    }
219
220    let mut results: Vec<(String, f64)> = scores.into_iter().collect();
221    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
222    results.truncate(limit);
223    Ok(results)
224}
225
226/// Graph connectivity stats for a file: incoming/outgoing edge counts by kind.
227pub fn file_connectivity(
228    conn: &Connection,
229    file_path: &str,
230) -> anyhow::Result<HashMap<String, (usize, usize)>> {
231    let mut result: HashMap<String, (usize, usize)> = HashMap::new();
232
233    let mut stmt_out = conn.prepare(
234        "SELECT e.kind, COUNT(*)
235         FROM edges e JOIN nodes n ON e.source_id = n.id
236         WHERE n.file_path = ?1
237         GROUP BY e.kind",
238    )?;
239    let rows = stmt_out.query_map(params![file_path], |row| {
240        Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
241    })?;
242    for row in rows {
243        let (kind, count) = row?;
244        result.entry(kind).or_insert((0, 0)).0 = count as usize;
245    }
246
247    let mut stmt_in = conn.prepare(
248        "SELECT e.kind, COUNT(*)
249         FROM edges e JOIN nodes n ON e.target_id = n.id
250         WHERE n.file_path = ?1
251         GROUP BY e.kind",
252    )?;
253    let rows = stmt_in.query_map(params![file_path], |row| {
254        Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?))
255    })?;
256    for row in rows {
257        let (kind, count) = row?;
258        result.entry(kind).or_insert((0, 0)).1 = count as usize;
259    }
260
261    Ok(result)
262}
263
264fn build_weighted_reverse_graph(
265    conn: &Connection,
266) -> anyhow::Result<HashMap<String, Vec<(String, f64)>>> {
267    let sql = format!(
268        "SELECT n_tgt.file_path, n_src.file_path, e.kind
269         FROM edges e
270         JOIN nodes n_src ON e.source_id = n_src.id
271         JOIN nodes n_tgt ON e.target_id = n_tgt.id
272         WHERE e.kind IN ({STRUCTURAL_EDGE_KINDS})
273           AND n_src.file_path != n_tgt.file_path"
274    );
275    let mut stmt = conn.prepare(&sql)?;
276
277    let mut graph: HashMap<String, HashMap<String, f64>> = HashMap::new();
278    let rows = stmt.query_map([], |row| {
279        Ok((
280            row.get::<_, String>(0)?,
281            row.get::<_, String>(1)?,
282            row.get::<_, String>(2)?,
283        ))
284    })?;
285
286    for row in rows {
287        let (target, source, kind) = row?;
288        let w = edge_weight(&kind);
289        let entry = graph
290            .entry(target)
291            .or_default()
292            .entry(source)
293            .or_insert(0.0);
294        if w > *entry {
295            *entry = w;
296        }
297    }
298
299    Ok(graph
300        .into_iter()
301        .map(|(k, v)| (k, v.into_iter().collect()))
302        .collect())
303}
304
305fn build_forward_graph(conn: &Connection) -> anyhow::Result<HashMap<String, Vec<String>>> {
306    let sql = format!(
307        "SELECT DISTINCT n_src.file_path, n_tgt.file_path
308         FROM edges e
309         JOIN nodes n_src ON e.source_id = n_src.id
310         JOIN nodes n_tgt ON e.target_id = n_tgt.id
311         WHERE e.kind IN ({STRUCTURAL_EDGE_KINDS})
312           AND n_src.file_path != n_tgt.file_path"
313    );
314    let mut stmt = conn.prepare(&sql)?;
315
316    let mut graph: HashMap<String, Vec<String>> = HashMap::new();
317    let rows = stmt.query_map([], |row| {
318        Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
319    })?;
320
321    for row in rows {
322        let (source, target) = row?;
323        graph.entry(source).or_default().push(target);
324    }
325
326    for deps in graph.values_mut() {
327        deps.sort();
328        deps.dedup();
329    }
330    Ok(graph)
331}