1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
//! Call graph queries: callers, callees, call graph construction, context.
use std::path::PathBuf;
use sqlx::Row;
use crate::store::helpers::{
clamp_line_number, CallGraph, CallerInfo, CallerWithContext, StoreError,
};
use crate::store::Store;
impl Store {
/// Find all callers of a function (from full call graph)
pub fn get_callers_full(&self, callee_name: &str) -> Result<Vec<CallerInfo>, StoreError> {
let _span = tracing::debug_span!("get_callers_full", function = %callee_name).entered();
tracing::debug!(callee_name, "querying callers from full call graph");
self.rt.block_on(async {
let rows: Vec<(String, String, i64)> = sqlx::query_as(
"SELECT DISTINCT file, caller_name, caller_line
FROM function_calls
WHERE callee_name = ?1
ORDER BY file, caller_line",
)
.bind(callee_name)
.fetch_all(&self.pool)
.await?;
let callers: Vec<CallerInfo> = rows
.into_iter()
.map(|(file, name, line)| CallerInfo {
file: PathBuf::from(file),
name,
line: clamp_line_number(line),
})
.collect();
Ok(callers)
})
}
/// Get all callees of a function (from full call graph)
/// When `file` is provided, scopes to callees of that function in that specific file.
/// When `None`, returns callees across all files (backwards compatible, but ambiguous
/// for common names like `new`, `parse`, `from_str`).
pub fn get_callees_full(
&self,
caller_name: &str,
file: Option<&str>,
) -> Result<Vec<(String, u32)>, StoreError> {
let _span = tracing::debug_span!("get_callees_full", function = %caller_name).entered();
self.rt.block_on(async {
let rows: Vec<(String, i64)> = sqlx::query_as(
"SELECT DISTINCT callee_name, call_line
FROM function_calls
WHERE caller_name = ?1 AND (?2 IS NULL OR file = ?2)
ORDER BY call_line",
)
.bind(caller_name)
.bind(file)
.fetch_all(&self.pool)
.await?;
Ok(rows
.into_iter()
.map(|(name, line)| (name, clamp_line_number(line)))
.collect())
})
}
/// Load the call graph as forward + reverse adjacency lists.
/// Single SQL scan of `function_calls`, capped at 500K edges to prevent OOM
/// on adversarial databases. Typical projects have ~2000 edges.
/// Used by trace (forward BFS), impact (reverse BFS), and test-map (reverse BFS).
/// Cached call graph — populated on first access, returns clone from OnceLock.
/// **No invalidation by design.** The cache lives for the `Store` lifetime and is
/// never cleared. Normal usage is one `Store` per CLI command, so the index cannot
/// change while the cache is live. In long-lived modes (batch, watch), callers must
/// re-open the `Store` to pick up index changes — do not add a `clear()` here.
/// ~15 call sites benefit from this single-scan caching.
pub fn get_call_graph(&self) -> Result<std::sync::Arc<CallGraph>, StoreError> {
if let Some(cached) = self.call_graph_cache.get() {
return Ok(std::sync::Arc::clone(cached));
}
let _span = tracing::info_span!("get_call_graph").entered();
let graph = self.rt.block_on(async {
const MAX_CALL_GRAPH_EDGES: i64 = 500_000;
let rows: Vec<(String, String)> = sqlx::query_as(
"SELECT DISTINCT caller_name, callee_name FROM function_calls LIMIT ?1",
)
.bind(MAX_CALL_GRAPH_EDGES)
.fetch_all(&self.pool)
.await?;
let edge_count = rows.len();
if edge_count as i64 >= MAX_CALL_GRAPH_EDGES {
tracing::warn!(
limit = MAX_CALL_GRAPH_EDGES,
"Call graph truncated at {} edges — analysis may be incomplete",
MAX_CALL_GRAPH_EDGES
);
} else {
tracing::info!(edges = edge_count, "Call graph loaded");
}
let mut forward: std::collections::HashMap<
std::sync::Arc<str>,
Vec<std::sync::Arc<str>>,
> = std::collections::HashMap::new();
let mut reverse: std::collections::HashMap<
std::sync::Arc<str>,
Vec<std::sync::Arc<str>>,
> = std::collections::HashMap::new();
// String interner: each unique name is allocated once as Arc<str>,
// then shared across forward and reverse maps (PERF-30).
let mut interner: std::collections::HashMap<String, std::sync::Arc<str>> =
std::collections::HashMap::new();
let mut intern = |s: String| -> std::sync::Arc<str> {
interner
.entry(s)
.or_insert_with_key(|k| std::sync::Arc::from(k.as_str()))
.clone()
};
for (caller, callee) in rows {
let caller = intern(caller);
let callee = intern(callee);
reverse
.entry(callee.clone())
.or_default()
.push(caller.clone());
forward.entry(caller).or_default().push(callee);
}
Ok::<_, StoreError>(CallGraph { forward, reverse })
})?;
let arc = std::sync::Arc::new(graph);
let _ = self.call_graph_cache.set(std::sync::Arc::clone(&arc));
Ok(arc)
}
/// Find callers with call-site line numbers for impact analysis.
/// Returns the caller function name, file, start line, and the specific line
/// where the call to `callee_name` occurs.
pub fn get_callers_with_context(
&self,
callee_name: &str,
) -> Result<Vec<CallerWithContext>, StoreError> {
let _span =
tracing::debug_span!("get_callers_with_context", function = %callee_name).entered();
self.rt.block_on(async {
let rows: Vec<(String, String, i64, i64)> = sqlx::query_as(
"SELECT file, caller_name, caller_line, call_line
FROM function_calls
WHERE callee_name = ?1
ORDER BY file, call_line",
)
.bind(callee_name)
.fetch_all(&self.pool)
.await?;
Ok(rows
.into_iter()
.map(|(file, name, caller_line, call_line)| CallerWithContext {
file: PathBuf::from(file),
name,
line: clamp_line_number(caller_line),
call_line: clamp_line_number(call_line),
})
.collect())
})
}
/// Batch-fetch callers with context for multiple callee names.
/// Returns `callee_name -> Vec<CallerWithContext>` using a single
/// `WHERE callee_name IN (...)` query per batch of 500 names.
/// Avoids N+1 `get_callers_with_context` calls in diff impact analysis.
pub fn get_callers_with_context_batch(
&self,
callee_names: &[&str],
) -> Result<std::collections::HashMap<String, Vec<CallerWithContext>>, StoreError> {
let _span =
tracing::debug_span!("get_callers_with_context_batch", count = callee_names.len())
.entered();
if callee_names.is_empty() {
return Ok(std::collections::HashMap::new());
}
self.rt.block_on(async {
let mut result: std::collections::HashMap<String, Vec<CallerWithContext>> =
std::collections::HashMap::new();
const BATCH_SIZE: usize = 200; // 200 names * 5 cols = 1000 binds, but we only bind names
for batch in callee_names.chunks(BATCH_SIZE) {
let placeholders = super::super::helpers::make_placeholders(batch.len());
let sql = format!(
"SELECT callee_name, file, caller_name, caller_line, call_line
FROM function_calls
WHERE callee_name IN ({})
ORDER BY callee_name, file, call_line",
placeholders
);
let mut q = sqlx::query(&sql);
for name in batch {
q = q.bind(name);
}
let rows: Vec<_> = q.fetch_all(&self.pool).await?;
for row in rows {
let callee: String = row.get(0);
let caller = CallerWithContext {
file: PathBuf::from(row.get::<String, _>(1)),
name: row.get(2),
line: clamp_line_number(row.get::<i64, _>(3)),
call_line: clamp_line_number(row.get::<i64, _>(4)),
};
result.entry(callee).or_default().push(caller);
}
}
Ok(result)
})
}
/// Batch-fetch callers (full call graph) for multiple callee names.
/// Returns `callee_name -> Vec<CallerInfo>` using a single
/// `WHERE callee_name IN (...)` query per batch of 500 names.
/// Avoids N+1 `get_callers_full` calls in the context command.
pub fn get_callers_full_batch(
&self,
callee_names: &[&str],
) -> Result<std::collections::HashMap<String, Vec<CallerInfo>>, StoreError> {
let _span =
tracing::debug_span!("get_callers_full_batch", count = callee_names.len()).entered();
if callee_names.is_empty() {
return Ok(std::collections::HashMap::new());
}
self.rt.block_on(async {
let mut result: std::collections::HashMap<String, Vec<CallerInfo>> =
std::collections::HashMap::new();
const BATCH_SIZE: usize = 250; // 250 * 4 cols = 1000, but only binding names
for batch in callee_names.chunks(BATCH_SIZE) {
let placeholders = super::super::helpers::make_placeholders(batch.len());
let sql = format!(
"SELECT DISTINCT callee_name, file, caller_name, caller_line
FROM function_calls
WHERE callee_name IN ({})
ORDER BY callee_name, file, caller_line",
placeholders
);
let mut q = sqlx::query(&sql);
for name in batch {
q = q.bind(name);
}
let rows: Vec<_> = q.fetch_all(&self.pool).await?;
for row in rows {
let callee: String = row.get(0);
let caller = CallerInfo {
file: PathBuf::from(row.get::<String, _>(1)),
name: row.get(2),
line: clamp_line_number(row.get::<i64, _>(3)),
};
result.entry(callee).or_default().push(caller);
}
}
Ok(result)
})
}
/// Batch-fetch callees (full call graph) for multiple caller names.
/// Returns `caller_name -> Vec<(callee_name, call_line)>` using a single
/// `WHERE caller_name IN (...)` query per batch of 500 names.
/// Avoids N+1 `get_callees_full` calls in the context command.
/// Unlike [`get_callees_full`], does not support file scoping — returns
/// callees across all files. This is acceptable for the context command
/// which later filters by origin.
pub fn get_callees_full_batch(
&self,
caller_names: &[&str],
) -> Result<std::collections::HashMap<String, Vec<(String, u32)>>, StoreError> {
let _span =
tracing::debug_span!("get_callees_full_batch", count = caller_names.len()).entered();
if caller_names.is_empty() {
return Ok(std::collections::HashMap::new());
}
self.rt.block_on(async {
let mut result: std::collections::HashMap<String, Vec<(String, u32)>> =
std::collections::HashMap::new();
const BATCH_SIZE: usize = 250;
for batch in caller_names.chunks(BATCH_SIZE) {
let placeholders = super::super::helpers::make_placeholders(batch.len());
let sql = format!(
"SELECT DISTINCT caller_name, callee_name, call_line
FROM function_calls
WHERE caller_name IN ({})
ORDER BY caller_name, call_line",
placeholders
);
let mut q = sqlx::query(&sql);
for name in batch {
q = q.bind(name);
}
let rows: Vec<_> = q.fetch_all(&self.pool).await?;
for row in rows {
let caller: String = row.get(0);
let callee_name: String = row.get(1);
let call_line = clamp_line_number(row.get::<i64, _>(2));
result
.entry(caller)
.or_default()
.push((callee_name, call_line));
}
}
Ok(result)
})
}
}