cartog_db/store/resolution.rs
1//! 6-tier name-based edge resolution, full and scoped (incremental).
2//!
3//! Part of the [`Database`](super::Database) impl, split out of `lib.rs` for navigability.
4
5use super::*;
6
7impl Database {
8 // ── Edge Resolution ──
9
10 /// Resolve target_name → target_id for all unresolved edges.
11 ///
12 /// Runs two passes so that import edges resolved in pass 1 enable
13 /// import-path resolution (tier 2) for non-import edges in pass 2.
14 ///
15 /// 6-tier priority resolution (per pass):
16 /// 1. Same file — symbol with matching name in the same file
17 /// 2. Import-path — follow resolved imports to find the target in the imported file
18 /// 3. Same directory — symbol in a file in the same directory
19 /// 4. Parent scope preference — when multiple global matches, prefer same parent scope
20 /// 5. Unique project-wide match — exactly one symbol with that name globally
21 /// 6. Class over constructor — when exactly 2 matches and one is a class, prefer class
22 pub fn resolve_edges(&self) -> Result<u32> {
23 let tx = self.conn.unchecked_transaction()?;
24 let total = self.resolve_edges_in_tx()?;
25 tx.commit()?;
26 Ok(total)
27 }
28
29 /// Like [`Self::resolve_edges`] but assumes the caller already holds an
30 /// open transaction.
31 pub fn resolve_edges_in_tx(&self) -> Result<u32> {
32 let mut total_resolved = 0u32;
33 for _pass in 0..2 {
34 let resolved = self.resolve_edges_pass()?;
35 if resolved == 0 {
36 break;
37 }
38 total_resolved += resolved;
39 }
40 Ok(total_resolved)
41 }
42
43 fn resolve_edges_pass(&self) -> Result<u32> {
44 // Skip state=2 (unresolvable) edges: the heuristic can't resolve what
45 // LSP definitively gave up on, and re-walking them every dirty reindex
46 // burns measurable time on large repos. They re-enter via
47 // `reset_unresolvable_for_names` when a matching symbol appears.
48 let mut unresolved_stmt = self.conn.prepare(
49 "SELECT e.id, e.target_name, e.file_path, e.source_id
50 FROM edges e WHERE e.target_id IS NULL AND e.resolution_state = 0",
51 )?;
52
53 let unresolved: Vec<(i64, String, String, String)> = unresolved_stmt
54 .query_map([], |row| {
55 Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
56 })?
57 .collect::<std::result::Result<Vec<_>, _>>()?;
58
59 self.resolve_edge_batch(&unresolved)
60 }
61
62 /// 6-tier heuristic resolution for a batch of unresolved edges.
63 ///
64 /// Caller is responsible for transaction wrapping — see the public
65 /// [`Self::resolve_edges`] / [`Self::resolve_edges_scoped`] helpers.
66 fn resolve_edge_batch(&self, unresolved: &[(i64, String, String, String)]) -> Result<u32> {
67 let mut resolved = 0u32;
68
69 let mut same_file_stmt = self
70 .conn
71 .prepare("SELECT id FROM symbols WHERE name = ?1 AND file_path = ?2 LIMIT 1")?;
72
73 let mut import_resolve_stmt = self.conn.prepare(
74 "SELECT s.id FROM symbols s
75 INNER JOIN edges ie ON ie.kind = 'imports' AND ie.target_name = ?1
76 AND ie.target_id IS NOT NULL
77 INNER JOIN symbols is2 ON is2.id = ie.source_id AND is2.file_path = ?2
78 INNER JOIN symbols resolved ON resolved.id = ie.target_id
79 WHERE s.name = ?1 AND s.kind != 'import'
80 AND s.file_path = resolved.file_path
81 LIMIT 1",
82 )?;
83
84 let mut same_dir_stmt = self
85 .conn
86 .prepare("SELECT id FROM symbols WHERE name = ?1 AND file_path LIKE ?2 LIMIT 1")?;
87
88 let mut parent_scope_stmt = self.conn.prepare(
89 "SELECT s.id FROM symbols s
90 INNER JOIN symbols source ON source.id = ?2
91 WHERE s.name = ?1 AND s.parent_id = source.parent_id AND s.id != source.id
92 LIMIT 1",
93 )?;
94
95 let mut anywhere_stmt = self
96 .conn
97 .prepare("SELECT id, kind FROM symbols WHERE name = ?1 AND kind != 'import' LIMIT 3")?;
98
99 // Heuristic resolve must flip state=1 alongside target_id; otherwise
100 // unresolved_edges() (state=0 filter) would still surface the edge to
101 // the next LSP pass, wasting a server roundtrip on already-known answers.
102 let mut update_stmt = self.conn.prepare(
103 "UPDATE edges SET target_id = ?1, resolution_state = 1, resolution_source = ?2
104 WHERE id = ?3",
105 )?;
106
107 for (edge_id, target_name, edge_file, source_id) in unresolved {
108 // `\` is PHP's namespace separator (App\Auth\BaseService).
109 let simple_name = target_name
110 .rsplit(['.', '\\'])
111 .next()
112 .unwrap_or(target_name);
113
114 // 1) Same file
115 let target_id: Option<String> = same_file_stmt
116 .query_row(params![simple_name, edge_file], |row| row.get(0))
117 .optional()?;
118
119 if let Some(tid) = target_id {
120 update_stmt.execute(params![tid, EdgeProvenance::SameFile.as_str(), edge_id])?;
121 resolved += 1;
122 continue;
123 }
124
125 // 2) Import-path resolution
126 let target_id: Option<String> = import_resolve_stmt
127 .query_row(params![simple_name, edge_file], |row| row.get(0))
128 .optional()?;
129
130 if let Some(tid) = target_id {
131 update_stmt.execute(params![tid, EdgeProvenance::ImportPath.as_str(), edge_id])?;
132 resolved += 1;
133 continue;
134 }
135
136 // 3) Same directory
137 let dir = edge_file
138 .rsplit_once('/')
139 .map(|(d, _)| format!("{d}/%"))
140 .unwrap_or_default();
141
142 if !dir.is_empty() {
143 let target_id: Option<String> = same_dir_stmt
144 .query_row(params![simple_name, dir], |row| row.get(0))
145 .optional()?;
146
147 if let Some(tid) = target_id {
148 update_stmt.execute(params![tid, EdgeProvenance::SameDir.as_str(), edge_id])?;
149 resolved += 1;
150 continue;
151 }
152 }
153
154 // 4) Parent scope preference
155 let target_id: Option<String> = parent_scope_stmt
156 .query_row(params![simple_name, source_id], |row| row.get(0))
157 .optional()?;
158
159 if let Some(tid) = target_id {
160 update_stmt.execute(params![tid, EdgeProvenance::ParentScope.as_str(), edge_id])?;
161 resolved += 1;
162 continue;
163 }
164
165 // 5+6) Project-wide: unique match, or class-over-constructor disambiguation
166 let mut rows = anywhere_stmt.query(params![simple_name])?;
167 let mut matches: Vec<(String, String)> = Vec::new();
168 while let Some(row) = rows.next()? {
169 matches.push((row.get(0)?, row.get(1)?));
170 if matches.len() == 3 {
171 break;
172 }
173 }
174 drop(rows);
175
176 // Tier and target are decided together so the provenance label can
177 // never drift from the branch that picked the id.
178 let resolution = match matches.as_slice() {
179 [(id, _)] => Some((id, EdgeProvenance::UniqueGlobal)),
180 [a, b] => disambiguate_two(a, b).map(|id| (id, EdgeProvenance::KindDisambig)),
181 _ => None,
182 };
183
184 if let Some((tid, prov)) = resolution {
185 update_stmt.execute(params![tid, prov.as_str(), edge_id])?;
186 resolved += 1;
187 }
188 }
189
190 Ok(resolved)
191 }
192
193 /// Compute and store in-degree centrality for all symbols.
194 ///
195 /// In-degree = number of resolved incoming edges (calls, imports, inherits, etc.).
196 /// Higher in-degree means the symbol is referenced more across the codebase.
197 /// Resets all in-degree values to 0 first, then batch-updates from the edges table.
198 ///
199 /// tx-safe: two unconditional statements participate in any active outer
200 /// transaction — see [`Self::begin_indexing_tx`].
201 pub fn compute_in_degrees(&self) -> Result<u32> {
202 self.conn.execute("UPDATE symbols SET in_degree = 0", [])?;
203
204 // UPDATE...FROM joins each count to its symbol by PK once. A correlated
205 // subquery here re-scans the counts set per row — O(symbols×edges).
206 let updated = self.conn.execute(
207 "UPDATE symbols SET in_degree = counts.cnt
208 FROM (
209 SELECT target_id, COUNT(*) AS cnt
210 FROM edges WHERE target_id IS NOT NULL
211 GROUP BY target_id
212 ) AS counts
213 WHERE symbols.id = counts.target_id",
214 [],
215 )?;
216
217 Ok(updated as u32)
218 }
219
220 // ── Scoped resolution (incremental indexing) ──
221
222 /// Invalidate resolved edges that point into any of the dirty files.
223 ///
224 /// When a file is re-indexed, its symbols may have been renamed/removed.
225 /// Edges from *unchanged* files that previously resolved to those symbols
226 /// must be cleared so they can be re-resolved against the new symbol set.
227 ///
228 /// tx-safe: single statement — see [`Self::begin_indexing_tx`].
229 pub fn invalidate_edges_targeting(
230 &self,
231 dirty_files: &std::collections::HashSet<String>,
232 ) -> Result<u32> {
233 if dirty_files.is_empty() {
234 return Ok(0);
235 }
236 // After file re-indexing, edges from unchanged files may point to
237 // symbol IDs that no longer exist (removed or renamed symbols).
238 // Set these dangling references to NULL AND reset resolution_state so
239 // the heuristic + LSP passes get another shot. Without the state reset
240 // the edge would stay at state=1 (resolved) but with target_id NULL —
241 // permanently invisible to `unresolved_edges()`.
242 let n = self.conn.execute(
243 "UPDATE edges SET target_id = NULL, resolution_state = 0, resolution_source = NULL
244 WHERE target_id IS NOT NULL
245 AND NOT EXISTS (SELECT 1 FROM symbols WHERE symbols.id = edges.target_id)",
246 [],
247 )?;
248 Ok(n as u32)
249 }
250
251 /// Resolve edges scoped to dirty files only.
252 ///
253 /// Processes: edges originating from dirty files (freshly extracted)
254 /// and edges whose target was just invalidated (target_id set to NULL).
255 /// Uses the same 6-tier heuristic as `resolve_edges`.
256 /// Resolve edges after scoped invalidation.
257 ///
258 /// After `invalidate_edges_targeting` has cleared target_ids for edges
259 /// pointing into dirty files, this re-resolves all currently unresolved edges.
260 /// Fewer edges are unresolved compared to a first-time full resolve.
261 pub fn resolve_edges_scoped(
262 &self,
263 dirty_files: &std::collections::HashSet<String>,
264 ) -> Result<u32> {
265 let tx = self.conn.unchecked_transaction()?;
266 let total = self.resolve_edges_scoped_in_tx(dirty_files)?;
267 tx.commit()?;
268 Ok(total)
269 }
270
271 /// Like [`Self::resolve_edges_scoped`] but assumes the caller already
272 /// holds an open transaction.
273 pub fn resolve_edges_scoped_in_tx(
274 &self,
275 dirty_files: &std::collections::HashSet<String>,
276 ) -> Result<u32> {
277 if dirty_files.is_empty() {
278 return Ok(0);
279 }
280 // After invalidation, the set of unresolved edges is naturally scoped:
281 // only edges from dirty files (freshly extracted) or targeting dirty files
282 // (just invalidated) have target_id = NULL.
283 // Reuse the same 2-pass resolution.
284 self.resolve_edges_in_tx()
285 }
286
287 /// Mark every still-unresolved edge as `resolution_state = 4`
288 /// (heuristic-exhausted, no LSP pass ran).
289 ///
290 /// Call ONLY at the end of an index run that did not (and will not) run the
291 /// LSP pass — i.e. `--no-lsp`, `cartog watch`, or a feature-`lsp`-off build.
292 /// In those runs the 6-tier heuristic is the only resolver, so a leftover
293 /// state=0 edge is permanently unresolvable until the symbol graph changes.
294 /// Without this marker every incremental re-index re-walks the whole
295 /// state=0 backlog (the watch-mode amplification from #109): the
296 /// `resolution_state = 0` scan in `resolve_edges_pass` grows with the
297 /// permanent-failure set, not just the dirty edges.
298 ///
299 /// State 4 is sticky like {2, 3} and re-enters the unresolved set the same
300 /// way: [`Self::reset_unresolvable_for_names`] when a matching symbol is
301 /// added, [`Self::reset_all_unresolvable`] on `--force`, or
302 /// [`Self::reopen_heuristic_exhausted`] before a later LSP-enabled reindex.
303 /// Returns the number of edges marked.
304 ///
305 /// tx-safe: single statement — see [`Self::begin_indexing_tx`].
306 pub fn mark_heuristic_exhausted_in_tx(&self) -> Result<u32> {
307 let n = self.conn.execute(
308 "UPDATE edges SET resolution_state = 4
309 WHERE target_id IS NULL AND resolution_state = 0",
310 [],
311 )?;
312 Ok(n as u32)
313 }
314
315 /// Recompute in-degree centrality after an incremental re-index.
316 ///
317 /// Scoping the reset to dirty files cannot be correct: a symbol in an
318 /// unchanged file that *lost* its incoming edge is unfindable once the
319 /// dirty file's old edges are deleted. Instead, correct every symbol
320 /// whose stored value disagrees with the actual edge count — a full
321 /// ground-truth pass that writes only the rows that changed (unlike
322 /// [`Self::compute_in_degrees`], which rewrites all rows).
323 ///
324 /// tx-safe: every internal statement participates in any active outer
325 /// transaction — see [`Self::begin_indexing_tx`]. Does NOT open one of
326 /// its own, unlike the batched `*_in_tx` helpers; outside an outer
327 /// transaction the zeroing is not atomic with the recompute.
328 pub fn compute_in_degrees_scoped(
329 &self,
330 dirty_files: &std::collections::HashSet<String>,
331 ) -> Result<u32> {
332 if dirty_files.is_empty() {
333 return Ok(0);
334 }
335
336 // Zero symbols that no longer have any incoming edge.
337 let zeroed = self.conn.execute(
338 "UPDATE symbols SET in_degree = 0
339 WHERE in_degree != 0
340 AND id NOT IN (SELECT target_id FROM edges WHERE target_id IS NOT NULL)",
341 [],
342 )?;
343
344 // Fix symbols whose stored value disagrees with the actual count.
345 // UPDATE...FROM avoids the correlated re-scan (see compute_in_degrees);
346 // the `!=` predicate keeps the write set to only changed rows.
347 let updated = self.conn.execute(
348 "UPDATE symbols SET in_degree = counts.cnt
349 FROM (
350 SELECT target_id, COUNT(*) AS cnt
351 FROM edges WHERE target_id IS NOT NULL
352 GROUP BY target_id
353 ) AS counts
354 WHERE symbols.id = counts.target_id
355 AND symbols.in_degree != counts.cnt",
356 [],
357 )?;
358
359 Ok((zeroed + updated) as u32)
360 }
361}