Skip to main content

cartog_db/store/
resolution.rs

1//! 6-tier name-based edge resolution, full and scoped (incremental).
2//!
3//! Part of the [`Database`](super::Database) impl, split out of `lib.rs` for navigability.
4
5use super::*;
6
7impl Database {
8    // ── Edge Resolution ──
9
10    /// Resolve target_name → target_id for all unresolved edges.
11    ///
12    /// Runs two passes so that import edges resolved in pass 1 enable
13    /// import-path resolution (tier 2) for non-import edges in pass 2.
14    ///
15    /// 6-tier priority resolution (per pass):
16    /// 1. Same file — symbol with matching name in the same file
17    /// 2. Import-path — follow resolved imports to find the target in the imported file
18    /// 3. Same directory — symbol in a file in the same directory
19    /// 4. Parent scope preference — when multiple global matches, prefer same parent scope
20    /// 5. Unique project-wide match — exactly one symbol with that name globally
21    /// 6. Class over constructor — when exactly 2 matches and one is a class, prefer class
22    pub fn resolve_edges(&self) -> Result<u32> {
23        let tx = self.conn.unchecked_transaction()?;
24        let total = self.resolve_edges_in_tx()?;
25        tx.commit()?;
26        Ok(total)
27    }
28
29    /// Like [`Self::resolve_edges`] but assumes the caller already holds an
30    /// open transaction.
31    pub fn resolve_edges_in_tx(&self) -> Result<u32> {
32        let mut total_resolved = 0u32;
33        for _pass in 0..2 {
34            let resolved = self.resolve_edges_pass()?;
35            if resolved == 0 {
36                break;
37            }
38            total_resolved += resolved;
39        }
40        Ok(total_resolved)
41    }
42
43    fn resolve_edges_pass(&self) -> Result<u32> {
44        // Skip state=2 (unresolvable) edges: the heuristic can't resolve what
45        // LSP definitively gave up on, and re-walking them every dirty reindex
46        // burns measurable time on large repos. They re-enter via
47        // `reset_unresolvable_for_names` when a matching symbol appears.
48        let mut unresolved_stmt = self.conn.prepare(
49            "SELECT e.id, e.target_name, e.file_path, e.source_id
50             FROM edges e WHERE e.target_id IS NULL AND e.resolution_state = 0",
51        )?;
52
53        let unresolved: Vec<(i64, String, String, String)> = unresolved_stmt
54            .query_map([], |row| {
55                Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
56            })?
57            .collect::<std::result::Result<Vec<_>, _>>()?;
58
59        self.resolve_edge_batch(&unresolved)
60    }
61
62    /// 6-tier heuristic resolution for a batch of unresolved edges.
63    ///
64    /// Caller is responsible for transaction wrapping — see the public
65    /// [`Self::resolve_edges`] / [`Self::resolve_edges_scoped`] helpers.
66    fn resolve_edge_batch(&self, unresolved: &[(i64, String, String, String)]) -> Result<u32> {
67        let mut resolved = 0u32;
68
69        let mut same_file_stmt = self
70            .conn
71            .prepare("SELECT id FROM symbols WHERE name = ?1 AND file_path = ?2 LIMIT 1")?;
72
73        let mut import_resolve_stmt = self.conn.prepare(
74            "SELECT s.id FROM symbols s
75             INNER JOIN edges ie ON ie.kind = 'imports' AND ie.target_name = ?1
76                 AND ie.target_id IS NOT NULL
77             INNER JOIN symbols is2 ON is2.id = ie.source_id AND is2.file_path = ?2
78             INNER JOIN symbols resolved ON resolved.id = ie.target_id
79             WHERE s.name = ?1 AND s.kind != 'import'
80                 AND s.file_path = resolved.file_path
81             LIMIT 1",
82        )?;
83
84        let mut same_dir_stmt = self
85            .conn
86            .prepare("SELECT id FROM symbols WHERE name = ?1 AND file_path LIKE ?2 LIMIT 1")?;
87
88        let mut parent_scope_stmt = self.conn.prepare(
89            "SELECT s.id FROM symbols s
90             INNER JOIN symbols source ON source.id = ?2
91             WHERE s.name = ?1 AND s.parent_id = source.parent_id AND s.id != source.id
92             LIMIT 1",
93        )?;
94
95        let mut anywhere_stmt = self
96            .conn
97            .prepare("SELECT id, kind FROM symbols WHERE name = ?1 AND kind != 'import' LIMIT 3")?;
98
99        // Heuristic resolve must flip state=1 alongside target_id; otherwise
100        // unresolved_edges() (state=0 filter) would still surface the edge to
101        // the next LSP pass, wasting a server roundtrip on already-known answers.
102        let mut update_stmt = self.conn.prepare(
103            "UPDATE edges SET target_id = ?1, resolution_state = 1, resolution_source = ?2
104             WHERE id = ?3",
105        )?;
106
107        for (edge_id, target_name, edge_file, source_id) in unresolved {
108            // `\` is PHP's namespace separator (App\Auth\BaseService).
109            let simple_name = target_name
110                .rsplit(['.', '\\'])
111                .next()
112                .unwrap_or(target_name);
113
114            // 1) Same file
115            let target_id: Option<String> = same_file_stmt
116                .query_row(params![simple_name, edge_file], |row| row.get(0))
117                .optional()?;
118
119            if let Some(tid) = target_id {
120                update_stmt.execute(params![tid, EdgeProvenance::SameFile.as_str(), edge_id])?;
121                resolved += 1;
122                continue;
123            }
124
125            // 2) Import-path resolution
126            let target_id: Option<String> = import_resolve_stmt
127                .query_row(params![simple_name, edge_file], |row| row.get(0))
128                .optional()?;
129
130            if let Some(tid) = target_id {
131                update_stmt.execute(params![tid, EdgeProvenance::ImportPath.as_str(), edge_id])?;
132                resolved += 1;
133                continue;
134            }
135
136            // 3) Same directory
137            let dir = edge_file
138                .rsplit_once('/')
139                .map(|(d, _)| format!("{d}/%"))
140                .unwrap_or_default();
141
142            if !dir.is_empty() {
143                let target_id: Option<String> = same_dir_stmt
144                    .query_row(params![simple_name, dir], |row| row.get(0))
145                    .optional()?;
146
147                if let Some(tid) = target_id {
148                    update_stmt.execute(params![tid, EdgeProvenance::SameDir.as_str(), edge_id])?;
149                    resolved += 1;
150                    continue;
151                }
152            }
153
154            // 4) Parent scope preference
155            let target_id: Option<String> = parent_scope_stmt
156                .query_row(params![simple_name, source_id], |row| row.get(0))
157                .optional()?;
158
159            if let Some(tid) = target_id {
160                update_stmt.execute(params![tid, EdgeProvenance::ParentScope.as_str(), edge_id])?;
161                resolved += 1;
162                continue;
163            }
164
165            // 5+6) Project-wide: unique match, or class-over-constructor disambiguation
166            let mut rows = anywhere_stmt.query(params![simple_name])?;
167            let mut matches: Vec<(String, String)> = Vec::new();
168            while let Some(row) = rows.next()? {
169                matches.push((row.get(0)?, row.get(1)?));
170                if matches.len() == 3 {
171                    break;
172                }
173            }
174            drop(rows);
175
176            // Tier and target are decided together so the provenance label can
177            // never drift from the branch that picked the id.
178            let resolution = match matches.as_slice() {
179                [(id, _)] => Some((id, EdgeProvenance::UniqueGlobal)),
180                [a, b] => disambiguate_two(a, b).map(|id| (id, EdgeProvenance::KindDisambig)),
181                _ => None,
182            };
183
184            if let Some((tid, prov)) = resolution {
185                update_stmt.execute(params![tid, prov.as_str(), edge_id])?;
186                resolved += 1;
187            }
188        }
189
190        Ok(resolved)
191    }
192
193    /// Compute and store in-degree centrality for all symbols.
194    ///
195    /// In-degree = number of resolved incoming edges (calls, imports, inherits, etc.).
196    /// Higher in-degree means the symbol is referenced more across the codebase.
197    /// Resets all in-degree values to 0 first, then batch-updates from the edges table.
198    ///
199    /// tx-safe: two unconditional statements participate in any active outer
200    /// transaction — see [`Self::begin_indexing_tx`].
201    pub fn compute_in_degrees(&self) -> Result<u32> {
202        self.conn.execute("UPDATE symbols SET in_degree = 0", [])?;
203
204        // UPDATE...FROM joins each count to its symbol by PK once. A correlated
205        // subquery here re-scans the counts set per row — O(symbols×edges).
206        let updated = self.conn.execute(
207            "UPDATE symbols SET in_degree = counts.cnt
208             FROM (
209                 SELECT target_id, COUNT(*) AS cnt
210                 FROM edges WHERE target_id IS NOT NULL
211                 GROUP BY target_id
212             ) AS counts
213             WHERE symbols.id = counts.target_id",
214            [],
215        )?;
216
217        Ok(updated as u32)
218    }
219
220    // ── Scoped resolution (incremental indexing) ──
221
222    /// Invalidate resolved edges that point into any of the dirty files.
223    ///
224    /// When a file is re-indexed, its symbols may have been renamed/removed.
225    /// Edges from *unchanged* files that previously resolved to those symbols
226    /// must be cleared so they can be re-resolved against the new symbol set.
227    ///
228    /// tx-safe: single statement — see [`Self::begin_indexing_tx`].
229    pub fn invalidate_edges_targeting(
230        &self,
231        dirty_files: &std::collections::HashSet<String>,
232    ) -> Result<u32> {
233        if dirty_files.is_empty() {
234            return Ok(0);
235        }
236        // After file re-indexing, edges from unchanged files may point to
237        // symbol IDs that no longer exist (removed or renamed symbols).
238        // Set these dangling references to NULL AND reset resolution_state so
239        // the heuristic + LSP passes get another shot. Without the state reset
240        // the edge would stay at state=1 (resolved) but with target_id NULL —
241        // permanently invisible to `unresolved_edges()`.
242        let n = self.conn.execute(
243            "UPDATE edges SET target_id = NULL, resolution_state = 0, resolution_source = NULL
244             WHERE target_id IS NOT NULL
245               AND NOT EXISTS (SELECT 1 FROM symbols WHERE symbols.id = edges.target_id)",
246            [],
247        )?;
248        Ok(n as u32)
249    }
250
251    /// Resolve edges scoped to dirty files only.
252    ///
253    /// Processes: edges originating from dirty files (freshly extracted)
254    /// and edges whose target was just invalidated (target_id set to NULL).
255    /// Uses the same 6-tier heuristic as `resolve_edges`.
256    /// Resolve edges after scoped invalidation.
257    ///
258    /// After `invalidate_edges_targeting` has cleared target_ids for edges
259    /// pointing into dirty files, this re-resolves all currently unresolved edges.
260    /// Fewer edges are unresolved compared to a first-time full resolve.
261    pub fn resolve_edges_scoped(
262        &self,
263        dirty_files: &std::collections::HashSet<String>,
264    ) -> Result<u32> {
265        let tx = self.conn.unchecked_transaction()?;
266        let total = self.resolve_edges_scoped_in_tx(dirty_files)?;
267        tx.commit()?;
268        Ok(total)
269    }
270
271    /// Like [`Self::resolve_edges_scoped`] but assumes the caller already
272    /// holds an open transaction.
273    pub fn resolve_edges_scoped_in_tx(
274        &self,
275        dirty_files: &std::collections::HashSet<String>,
276    ) -> Result<u32> {
277        if dirty_files.is_empty() {
278            return Ok(0);
279        }
280        // After invalidation, the set of unresolved edges is naturally scoped:
281        // only edges from dirty files (freshly extracted) or targeting dirty files
282        // (just invalidated) have target_id = NULL.
283        // Reuse the same 2-pass resolution.
284        self.resolve_edges_in_tx()
285    }
286
287    /// Mark every still-unresolved edge as `resolution_state = 4`
288    /// (heuristic-exhausted, no LSP pass ran).
289    ///
290    /// Call ONLY at the end of an index run that did not (and will not) run the
291    /// LSP pass — i.e. `--no-lsp`, `cartog watch`, or a feature-`lsp`-off build.
292    /// In those runs the 6-tier heuristic is the only resolver, so a leftover
293    /// state=0 edge is permanently unresolvable until the symbol graph changes.
294    /// Without this marker every incremental re-index re-walks the whole
295    /// state=0 backlog (the watch-mode amplification from #109): the
296    /// `resolution_state = 0` scan in `resolve_edges_pass` grows with the
297    /// permanent-failure set, not just the dirty edges.
298    ///
299    /// State 4 is sticky like {2, 3} and re-enters the unresolved set the same
300    /// way: [`Self::reset_unresolvable_for_names`] when a matching symbol is
301    /// added, [`Self::reset_all_unresolvable`] on `--force`, or
302    /// [`Self::reopen_heuristic_exhausted`] before a later LSP-enabled reindex.
303    /// Returns the number of edges marked.
304    ///
305    /// tx-safe: single statement — see [`Self::begin_indexing_tx`].
306    pub fn mark_heuristic_exhausted_in_tx(&self) -> Result<u32> {
307        let n = self.conn.execute(
308            "UPDATE edges SET resolution_state = 4
309             WHERE target_id IS NULL AND resolution_state = 0",
310            [],
311        )?;
312        Ok(n as u32)
313    }
314
315    /// Recompute in-degree centrality after an incremental re-index.
316    ///
317    /// Scoping the reset to dirty files cannot be correct: a symbol in an
318    /// unchanged file that *lost* its incoming edge is unfindable once the
319    /// dirty file's old edges are deleted. Instead, correct every symbol
320    /// whose stored value disagrees with the actual edge count — a full
321    /// ground-truth pass that writes only the rows that changed (unlike
322    /// [`Self::compute_in_degrees`], which rewrites all rows).
323    ///
324    /// tx-safe: every internal statement participates in any active outer
325    /// transaction — see [`Self::begin_indexing_tx`]. Does NOT open one of
326    /// its own, unlike the batched `*_in_tx` helpers; outside an outer
327    /// transaction the zeroing is not atomic with the recompute.
328    pub fn compute_in_degrees_scoped(
329        &self,
330        dirty_files: &std::collections::HashSet<String>,
331    ) -> Result<u32> {
332        if dirty_files.is_empty() {
333            return Ok(0);
334        }
335
336        // Zero symbols that no longer have any incoming edge.
337        let zeroed = self.conn.execute(
338            "UPDATE symbols SET in_degree = 0
339             WHERE in_degree != 0
340               AND id NOT IN (SELECT target_id FROM edges WHERE target_id IS NOT NULL)",
341            [],
342        )?;
343
344        // Fix symbols whose stored value disagrees with the actual count.
345        // UPDATE...FROM avoids the correlated re-scan (see compute_in_degrees);
346        // the `!=` predicate keeps the write set to only changed rows.
347        let updated = self.conn.execute(
348            "UPDATE symbols SET in_degree = counts.cnt
349             FROM (
350                 SELECT target_id, COUNT(*) AS cnt
351                 FROM edges WHERE target_id IS NOT NULL
352                 GROUP BY target_id
353             ) AS counts
354             WHERE symbols.id = counts.target_id
355               AND symbols.in_degree != counts.cnt",
356            [],
357        )?;
358
359        Ok((zeroed + updated) as u32)
360    }
361}