1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
//! 6-tier name-based edge resolution, full and scoped (incremental).
//!
//! Part of the [`Database`](super::Database) impl, split out of `lib.rs` for navigability.
use super::*;
impl Database {
// ── Edge Resolution ──
/// Resolve target_name → target_id for all unresolved edges.
///
/// Runs two passes so that import edges resolved in pass 1 enable
/// import-path resolution (tier 2) for non-import edges in pass 2.
///
/// 6-tier priority resolution (per pass):
/// 1. Same file — symbol with matching name in the same file
/// 2. Import-path — follow resolved imports to find the target in the imported file
/// 3. Same directory — symbol in a file in the same directory
/// 4. Parent scope preference — when multiple global matches, prefer same parent scope
/// 5. Unique project-wide match — exactly one symbol with that name globally
/// 6. Class over constructor — when exactly 2 matches and one is a class, prefer class
pub fn resolve_edges(&self) -> Result<u32> {
let tx = self.conn.unchecked_transaction()?;
let total = self.resolve_edges_in_tx()?;
tx.commit()?;
Ok(total)
}
/// Like [`Self::resolve_edges`] but assumes the caller already holds an
/// open transaction.
pub fn resolve_edges_in_tx(&self) -> Result<u32> {
let mut total_resolved = 0u32;
for _pass in 0..2 {
let resolved = self.resolve_edges_pass()?;
if resolved == 0 {
break;
}
total_resolved += resolved;
}
Ok(total_resolved)
}
fn resolve_edges_pass(&self) -> Result<u32> {
// Skip state=2 (unresolvable) edges: the heuristic can't resolve what
// LSP definitively gave up on, and re-walking them every dirty reindex
// burns measurable time on large repos. They re-enter via
// `reset_unresolvable_for_names` when a matching symbol appears.
let mut unresolved_stmt = self.conn.prepare(
"SELECT e.id, e.target_name, e.file_path, e.source_id
FROM edges e WHERE e.target_id IS NULL AND e.resolution_state = 0",
)?;
let unresolved: Vec<(i64, String, String, String)> = unresolved_stmt
.query_map([], |row| {
Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
})?
.collect::<std::result::Result<Vec<_>, _>>()?;
self.resolve_edge_batch(&unresolved)
}
/// 6-tier heuristic resolution for a batch of unresolved edges.
///
/// Caller is responsible for transaction wrapping — see the public
/// [`Self::resolve_edges`] / [`Self::resolve_edges_scoped`] helpers.
fn resolve_edge_batch(&self, unresolved: &[(i64, String, String, String)]) -> Result<u32> {
let mut resolved = 0u32;
let mut same_file_stmt = self
.conn
.prepare("SELECT id FROM symbols WHERE name = ?1 AND file_path = ?2 LIMIT 1")?;
let mut import_resolve_stmt = self.conn.prepare(
"SELECT s.id FROM symbols s
INNER JOIN edges ie ON ie.kind = 'imports' AND ie.target_name = ?1
AND ie.target_id IS NOT NULL
INNER JOIN symbols is2 ON is2.id = ie.source_id AND is2.file_path = ?2
INNER JOIN symbols resolved ON resolved.id = ie.target_id
WHERE s.name = ?1 AND s.kind != 'import'
AND s.file_path = resolved.file_path
LIMIT 1",
)?;
let mut same_dir_stmt = self
.conn
.prepare("SELECT id FROM symbols WHERE name = ?1 AND file_path LIKE ?2 LIMIT 1")?;
let mut parent_scope_stmt = self.conn.prepare(
"SELECT s.id FROM symbols s
INNER JOIN symbols source ON source.id = ?2
WHERE s.name = ?1 AND s.parent_id = source.parent_id AND s.id != source.id
LIMIT 1",
)?;
let mut anywhere_stmt = self
.conn
.prepare("SELECT id, kind FROM symbols WHERE name = ?1 AND kind != 'import' LIMIT 3")?;
// Heuristic resolve must flip state=1 alongside target_id; otherwise
// unresolved_edges() (state=0 filter) would still surface the edge to
// the next LSP pass, wasting a server roundtrip on already-known answers.
let mut update_stmt = self.conn.prepare(
"UPDATE edges SET target_id = ?1, resolution_state = 1, resolution_source = ?2
WHERE id = ?3",
)?;
for (edge_id, target_name, edge_file, source_id) in unresolved {
// `\` is PHP's namespace separator (App\Auth\BaseService).
let simple_name = target_name
.rsplit(['.', '\\'])
.next()
.unwrap_or(target_name);
// 1) Same file
let target_id: Option<String> = same_file_stmt
.query_row(params![simple_name, edge_file], |row| row.get(0))
.optional()?;
if let Some(tid) = target_id {
update_stmt.execute(params![tid, EdgeProvenance::SameFile.as_str(), edge_id])?;
resolved += 1;
continue;
}
// 2) Import-path resolution
let target_id: Option<String> = import_resolve_stmt
.query_row(params![simple_name, edge_file], |row| row.get(0))
.optional()?;
if let Some(tid) = target_id {
update_stmt.execute(params![tid, EdgeProvenance::ImportPath.as_str(), edge_id])?;
resolved += 1;
continue;
}
// 3) Same directory
let dir = edge_file
.rsplit_once('/')
.map(|(d, _)| format!("{d}/%"))
.unwrap_or_default();
if !dir.is_empty() {
let target_id: Option<String> = same_dir_stmt
.query_row(params![simple_name, dir], |row| row.get(0))
.optional()?;
if let Some(tid) = target_id {
update_stmt.execute(params![tid, EdgeProvenance::SameDir.as_str(), edge_id])?;
resolved += 1;
continue;
}
}
// 4) Parent scope preference
let target_id: Option<String> = parent_scope_stmt
.query_row(params![simple_name, source_id], |row| row.get(0))
.optional()?;
if let Some(tid) = target_id {
update_stmt.execute(params![tid, EdgeProvenance::ParentScope.as_str(), edge_id])?;
resolved += 1;
continue;
}
// 5+6) Project-wide: unique match, or class-over-constructor disambiguation
let mut rows = anywhere_stmt.query(params![simple_name])?;
let mut matches: Vec<(String, String)> = Vec::new();
while let Some(row) = rows.next()? {
matches.push((row.get(0)?, row.get(1)?));
if matches.len() == 3 {
break;
}
}
drop(rows);
// Tier and target are decided together so the provenance label can
// never drift from the branch that picked the id.
let resolution = match matches.as_slice() {
[(id, _)] => Some((id, EdgeProvenance::UniqueGlobal)),
[a, b] => disambiguate_two(a, b).map(|id| (id, EdgeProvenance::KindDisambig)),
_ => None,
};
if let Some((tid, prov)) = resolution {
update_stmt.execute(params![tid, prov.as_str(), edge_id])?;
resolved += 1;
}
}
Ok(resolved)
}
/// Compute and store in-degree centrality for all symbols.
///
/// In-degree = number of resolved incoming edges (calls, imports, inherits, etc.).
/// Higher in-degree means the symbol is referenced more across the codebase.
/// Resets all in-degree values to 0 first, then batch-updates from the edges table.
///
/// tx-safe: two unconditional statements participate in any active outer
/// transaction — see [`Self::begin_indexing_tx`].
pub fn compute_in_degrees(&self) -> Result<u32> {
self.conn.execute("UPDATE symbols SET in_degree = 0", [])?;
// UPDATE...FROM joins each count to its symbol by PK once. A correlated
// subquery here re-scans the counts set per row — O(symbols×edges).
let updated = self.conn.execute(
"UPDATE symbols SET in_degree = counts.cnt
FROM (
SELECT target_id, COUNT(*) AS cnt
FROM edges WHERE target_id IS NOT NULL
GROUP BY target_id
) AS counts
WHERE symbols.id = counts.target_id",
[],
)?;
Ok(updated as u32)
}
// ── Scoped resolution (incremental indexing) ──
/// Invalidate resolved edges that point into any of the dirty files.
///
/// When a file is re-indexed, its symbols may have been renamed/removed.
/// Edges from *unchanged* files that previously resolved to those symbols
/// must be cleared so they can be re-resolved against the new symbol set.
///
/// tx-safe: single statement — see [`Self::begin_indexing_tx`].
pub fn invalidate_edges_targeting(
&self,
dirty_files: &std::collections::HashSet<String>,
) -> Result<u32> {
if dirty_files.is_empty() {
return Ok(0);
}
// After file re-indexing, edges from unchanged files may point to
// symbol IDs that no longer exist (removed or renamed symbols).
// Set these dangling references to NULL AND reset resolution_state so
// the heuristic + LSP passes get another shot. Without the state reset
// the edge would stay at state=1 (resolved) but with target_id NULL —
// permanently invisible to `unresolved_edges()`.
let n = self.conn.execute(
"UPDATE edges SET target_id = NULL, resolution_state = 0, resolution_source = NULL
WHERE target_id IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM symbols WHERE symbols.id = edges.target_id)",
[],
)?;
Ok(n as u32)
}
/// Resolve edges scoped to dirty files only.
///
/// Processes: edges originating from dirty files (freshly extracted)
/// and edges whose target was just invalidated (target_id set to NULL).
/// Uses the same 6-tier heuristic as `resolve_edges`.
/// Resolve edges after scoped invalidation.
///
/// After `invalidate_edges_targeting` has cleared target_ids for edges
/// pointing into dirty files, this re-resolves all currently unresolved edges.
/// Fewer edges are unresolved compared to a first-time full resolve.
pub fn resolve_edges_scoped(
&self,
dirty_files: &std::collections::HashSet<String>,
) -> Result<u32> {
let tx = self.conn.unchecked_transaction()?;
let total = self.resolve_edges_scoped_in_tx(dirty_files)?;
tx.commit()?;
Ok(total)
}
/// Like [`Self::resolve_edges_scoped`] but assumes the caller already
/// holds an open transaction.
pub fn resolve_edges_scoped_in_tx(
&self,
dirty_files: &std::collections::HashSet<String>,
) -> Result<u32> {
if dirty_files.is_empty() {
return Ok(0);
}
// After invalidation, the set of unresolved edges is naturally scoped:
// only edges from dirty files (freshly extracted) or targeting dirty files
// (just invalidated) have target_id = NULL.
// Reuse the same 2-pass resolution.
self.resolve_edges_in_tx()
}
/// Mark every still-unresolved edge as `resolution_state = 4`
/// (heuristic-exhausted, no LSP pass ran).
///
/// Call ONLY at the end of an index run that did not (and will not) run the
/// LSP pass — i.e. `--no-lsp`, `cartog watch`, or a feature-`lsp`-off build.
/// In those runs the 6-tier heuristic is the only resolver, so a leftover
/// state=0 edge is permanently unresolvable until the symbol graph changes.
/// Without this marker every incremental re-index re-walks the whole
/// state=0 backlog (the watch-mode amplification from #109): the
/// `resolution_state = 0` scan in `resolve_edges_pass` grows with the
/// permanent-failure set, not just the dirty edges.
///
/// State 4 is sticky like {2, 3} and re-enters the unresolved set the same
/// way: [`Self::reset_unresolvable_for_names`] when a matching symbol is
/// added, [`Self::reset_all_unresolvable`] on `--force`, or
/// [`Self::reopen_heuristic_exhausted`] before a later LSP-enabled reindex.
/// Returns the number of edges marked.
///
/// tx-safe: single statement — see [`Self::begin_indexing_tx`].
pub fn mark_heuristic_exhausted_in_tx(&self) -> Result<u32> {
let n = self.conn.execute(
"UPDATE edges SET resolution_state = 4
WHERE target_id IS NULL AND resolution_state = 0",
[],
)?;
Ok(n as u32)
}
/// Recompute in-degree centrality after an incremental re-index.
///
/// Scoping the reset to dirty files cannot be correct: a symbol in an
/// unchanged file that *lost* its incoming edge is unfindable once the
/// dirty file's old edges are deleted. Instead, correct every symbol
/// whose stored value disagrees with the actual edge count — a full
/// ground-truth pass that writes only the rows that changed (unlike
/// [`Self::compute_in_degrees`], which rewrites all rows).
///
/// tx-safe: every internal statement participates in any active outer
/// transaction — see [`Self::begin_indexing_tx`]. Does NOT open one of
/// its own, unlike the batched `*_in_tx` helpers; outside an outer
/// transaction the zeroing is not atomic with the recompute.
pub fn compute_in_degrees_scoped(
&self,
dirty_files: &std::collections::HashSet<String>,
) -> Result<u32> {
if dirty_files.is_empty() {
return Ok(0);
}
// Zero symbols that no longer have any incoming edge.
let zeroed = self.conn.execute(
"UPDATE symbols SET in_degree = 0
WHERE in_degree != 0
AND id NOT IN (SELECT target_id FROM edges WHERE target_id IS NOT NULL)",
[],
)?;
// Fix symbols whose stored value disagrees with the actual count.
// UPDATE...FROM avoids the correlated re-scan (see compute_in_degrees);
// the `!=` predicate keeps the write set to only changed rows.
let updated = self.conn.execute(
"UPDATE symbols SET in_degree = counts.cnt
FROM (
SELECT target_id, COUNT(*) AS cnt
FROM edges WHERE target_id IS NOT NULL
GROUP BY target_id
) AS counts
WHERE symbols.id = counts.target_id
AND symbols.in_degree != counts.cnt",
[],
)?;
Ok((zeroed + updated) as u32)
}
}