harn_hostlib/code_index/mod.rs
1//! Code index host capability.
2//!
3//! Deterministic trigram/word index plus live workspace state (agent
4//! registry, advisory locks, append-only version log, file id assignment,
5//! cached reads). The capability owns one [`SharedIndex`] cell per
6//! instance; cloning the capability shares state with every Harn VM that
7//! has been wired against it.
8//!
9//! Surface — every builtin is locked by `schemas/code_index/<method>.json`:
10//!
11//! ### Workspace queries (the original 5)
12//!
13//! | Builtin | What it does |
14//! |----------------------------------|--------------------------------------------------------|
15//! | `hostlib_code_index_query` | Trigram-accelerated literal substring search. |
16//! | `hostlib_code_index_rebuild` | Walk a workspace and (re)build the in-memory index. |
17//! | `hostlib_code_index_stats` | Count files/trigrams/words + last rebuild timestamp. |
18//! | `hostlib_code_index_imports_for` | Imports declared by a single file (with resolutions). |
19//! | `hostlib_code_index_importers_of`| Reverse lookup: who imports the given module/path? |
20//!
21//! ### Live workspace state (added in #776)
22//!
23//! - **Agents**: `agent_register`, `agent_heartbeat`, `agent_unregister`,
24//! `current_agent_id`, `status`.
25//! - **Locks**: `lock_try`, `lock_release`.
26//! - **Change log**: `current_seq`, `changes_since`, `version_record`.
27//! - **File table**: `path_to_id`, `id_to_path`, `file_ids`, `file_meta`,
28//! `file_hash`.
29//! - **Cached reads**: `read_range`, `reindex_file`, `trigram_query`,
30//! `extract_trigrams`, `word_get`, `deps_get`, `outline_get`.
31//!
32//! ### Typed symbol graph (added in #2434)
33//!
34//! - **`cypher`**: read-only Cypher executor over the typed graph
35//! ([`SymbolGraph`]) — `MATCH ... WHERE ... RETURN` with typed
36//! nodes (Function|Type|Module|Import|CallSite|Macro), typed edges
37//! (CALLS|REFS|IMPORTS|CONTAINS|OVERRIDES, plus `_BY` inverses),
38//! and variable-length hops up to depth 4.
39//! - **`branch_overlay`**: per-branch CDC overlay that layers a delta
40//! on top of the base graph; reuses ≥95% of the main index in
41//! storage/CPU for untouched files. See [`BranchOverlay`].
42//! - **`freshness`**: per-file hash + mtime comparison against the
43//! indexed snapshot; consumers detect staleness without forcing a
44//! rebuild.
45//!
46//! ### Cross-file safe rename (added in #2508)
47//!
48//! - **`rename_symbol`**: rewrite a symbol across `file | module |
49//! workspace` using the typed graph for symbol resolution and
50//! tree-sitter identifier kinds for safe text spans. Detects
51//! `new_name` shadowing in any rewritten file and aborts before any
52//! write. Routes through staged-fs (#1722) when a `session_id` is
53//! supplied so all touched files succeed or none do.
54//!
55//! ## Concurrency model
56//!
57//! All ops serialise through a single `Arc<Mutex<Option<IndexState>>>` so
58//! the IDE editor, eval, and live agent all see one consistent view. The
59//! capability is `Send + Sync` so embedders can share it across threads,
60//! but the mutex still serialises actual work.
61
62mod agents;
63mod builtins;
64mod cypher;
65mod file_table;
66mod graph;
67mod imports;
68mod overlay;
69mod readonly;
70mod rename;
71mod snapshot;
72mod state;
73mod symbol_graph;
74mod trigram;
75mod versions;
76mod walker;
77mod words;
78
79use std::path::Path;
80use std::sync::{Arc, Mutex};
81
82use harn_vm::VmValue;
83
84use crate::error::HostlibError;
85use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
86
87pub use agents::{AgentId, AgentInfo, AgentRegistry, AgentState, RegistryConfig};
88pub use builtins::SharedIndex;
89pub use cypher::{CypherError, CypherRow, CypherValue};
90pub use file_table::{FileId, IndexedFile, IndexedSymbol};
91pub use graph::DepGraph;
92pub use overlay::{BranchOverlay, OverlayState};
93pub use readonly::ReadonlyRoots;
94pub use snapshot::{CodeIndexSnapshot, SnapshotMeta};
95pub use state::{BuildOutcome, IndexState};
96pub use symbol_graph::{Edge, EdgeKind, Node, NodeId, NodeKind, SymbolGraph};
97pub use trigram::TrigramIndex;
98pub use versions::{ChangeRecord, EditOp, VersionEntry, VersionLog, HISTORY_LIMIT};
99pub use words::{WordHit, WordIndex};
100
101/// Code-index capability handle.
102///
103/// Holds the [`SharedIndex`] cell behind an `Arc<Mutex<...>>`; cloning
104/// the capability shares state. The capability also threads a
105/// `current_agent_id` slot used by the `current_agent_id` host builtin —
106/// embedders update this slot from the request-handling layer so each
107/// host call surfaces the right agent identity to scripts.
108#[derive(Clone, Default)]
109pub struct CodeIndexCapability {
110 index: SharedIndex,
111 /// Additive, read-only secondary roots (issue #2403 follow-up). Live
112 /// beside the primary slot; query/read_range merge them in but no
113 /// mutating builtin ever touches them, so indexing a dependency root
114 /// never clobbers the project index.
115 readonly: ReadonlyRoots,
116 current_agent: Arc<Mutex<Option<AgentId>>>,
117}
118
119impl CodeIndexCapability {
120 /// Create a capability with an empty workspace slot. The first
121 /// `hostlib_code_index_rebuild` call populates it.
122 pub fn new() -> Self {
123 Self {
124 index: Arc::new(Mutex::new(None)),
125 readonly: Arc::new(Mutex::new(Vec::new())),
126 current_agent: Arc::new(Mutex::new(None)),
127 }
128 }
129
130 /// Borrow the underlying shared cell. Useful for tests and embedders
131 /// that want to introspect index state without going through the
132 /// builtins.
133 pub fn shared(&self) -> SharedIndex {
134 self.index.clone()
135 }
136
137 /// Borrow the current-agent slot. Embedders bind this slot before
138 /// dispatching a host call so that `current_agent_id` returns the
139 /// right value to the script.
140 pub fn current_agent_slot(&self) -> Arc<Mutex<Option<AgentId>>> {
141 self.current_agent.clone()
142 }
143
144 /// Convenience: set the current agent id. Returns the previous value
145 /// (so callers can restore on completion if they bind per-call).
146 pub fn set_current_agent(&self, id: Option<AgentId>) -> Option<AgentId> {
147 let mut guard = self.current_agent.lock().expect("current_agent poisoned");
148 std::mem::replace(&mut *guard, id)
149 }
150
151 /// Restore from a previously saved snapshot at the path returned by
152 /// [`CodeIndexSnapshot::path_for`]. After restoring, runs
153 /// [`IndexState::reap_after_recovery`] so stale agent records and
154 /// locks are dropped before the daemon serves traffic.
155 ///
156 /// Returns `true` on a successful restore, `false` if no snapshot
157 /// existed (or the format was unrecognised). Errors propagate I/O
158 /// problems verbatim so callers can decide whether to fall back to
159 /// `rebuild`.
160 pub fn restore_from_disk(&self, workspace_root: &Path) -> std::io::Result<bool> {
161 match CodeIndexSnapshot::load(workspace_root)? {
162 Some(snap) => {
163 let mut state = IndexState::from_snapshot(snap);
164 state.reap_after_recovery(state::now_unix_ms());
165 let mut guard = self.index.lock().expect("code_index mutex poisoned");
166 *guard = Some(state);
167 Ok(true)
168 }
169 None => Ok(false),
170 }
171 }
172
173 /// Persist the current in-memory state to the path returned by
174 /// [`CodeIndexSnapshot::path_for`]. Returns `Ok(false)` when the
175 /// capability is empty (nothing to save).
176 pub fn persist_to_disk(&self) -> std::io::Result<bool> {
177 let snap = {
178 let guard = self.index.lock().expect("code_index mutex poisoned");
179 guard
180 .as_ref()
181 .map(|state| (state.snapshot(), state.root.clone()))
182 };
183 match snap {
184 Some((snap, root)) => {
185 snap.save(&root)?;
186 Ok(true)
187 }
188 None => Ok(false),
189 }
190 }
191}
192
193impl HostlibCapability for CodeIndexCapability {
194 fn module_name(&self) -> &'static str {
195 "code_index"
196 }
197
198 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
199 // Workspace queries (original 5). `query` and `read_range` merge in
200 // the read-only secondary roots (issue #2403 follow-up), so they
201 // capture both the primary and the read-only cells.
202 {
203 let index = self.index.clone();
204 let readonly = self.readonly.clone();
205 let handler: SyncHandler =
206 Arc::new(move |args| builtins::run_query_merged(&index, Some(&readonly), args));
207 registry.register(RegisteredBuiltin {
208 name: builtins::BUILTIN_QUERY,
209 module: "code_index",
210 method: "query",
211 handler,
212 });
213 }
214 register(
215 registry,
216 self.index.clone(),
217 builtins::BUILTIN_REBUILD,
218 "rebuild",
219 builtins::run_rebuild,
220 );
221 register(
222 registry,
223 self.index.clone(),
224 builtins::BUILTIN_STATS,
225 "stats",
226 builtins::run_stats,
227 );
228 register(
229 registry,
230 self.index.clone(),
231 builtins::BUILTIN_IMPORTS_FOR,
232 "imports_for",
233 builtins::run_imports_for,
234 );
235 register(
236 registry,
237 self.index.clone(),
238 builtins::BUILTIN_IMPORTERS_OF,
239 "importers_of",
240 builtins::run_importers_of,
241 );
242
243 // Additive read-only secondary roots (issue #2403 follow-up).
244 // Captures the read-only cell directly — it never touches the
245 // primary index slot.
246 {
247 let readonly = self.readonly.clone();
248 let handler: SyncHandler =
249 Arc::new(move |args| readonly::run_add_readonly_roots(&readonly, args));
250 registry.register(RegisteredBuiltin {
251 name: readonly::BUILTIN_ADD_READONLY_ROOTS,
252 module: "code_index",
253 method: "add_readonly_roots",
254 handler,
255 });
256 }
257
258 // File table accessors.
259 register(
260 registry,
261 self.index.clone(),
262 builtins::BUILTIN_PATH_TO_ID,
263 "path_to_id",
264 builtins::run_path_to_id,
265 );
266 register(
267 registry,
268 self.index.clone(),
269 builtins::BUILTIN_ID_TO_PATH,
270 "id_to_path",
271 builtins::run_id_to_path,
272 );
273 register(
274 registry,
275 self.index.clone(),
276 builtins::BUILTIN_FILE_IDS,
277 "file_ids",
278 builtins::run_file_ids,
279 );
280 register(
281 registry,
282 self.index.clone(),
283 builtins::BUILTIN_FILE_META,
284 "file_meta",
285 builtins::run_file_meta,
286 );
287 register(
288 registry,
289 self.index.clone(),
290 builtins::BUILTIN_FILE_HASH,
291 "file_hash",
292 builtins::run_file_hash,
293 );
294
295 // Cached read paths. `read_range` falls back to the read-only
296 // secondary roots (issue #2403 follow-up) so a symbol discovered in
297 // a dependency root can be read back.
298 {
299 let index = self.index.clone();
300 let readonly = self.readonly.clone();
301 let handler: SyncHandler = Arc::new(move |args| {
302 builtins::run_read_range_merged(&index, Some(&readonly), args)
303 });
304 registry.register(RegisteredBuiltin {
305 name: builtins::BUILTIN_READ_RANGE,
306 module: "code_index",
307 method: "read_range",
308 handler,
309 });
310 }
311 register(
312 registry,
313 self.index.clone(),
314 builtins::BUILTIN_REINDEX_FILE,
315 "reindex_file",
316 builtins::run_reindex_file,
317 );
318 register(
319 registry,
320 self.index.clone(),
321 builtins::BUILTIN_TRIGRAM_QUERY,
322 "trigram_query",
323 builtins::run_trigram_query,
324 );
325 register(
326 registry,
327 self.index.clone(),
328 builtins::BUILTIN_EXTRACT_TRIGRAMS,
329 "extract_trigrams",
330 builtins::run_extract_trigrams,
331 );
332 register(
333 registry,
334 self.index.clone(),
335 builtins::BUILTIN_WORD_GET,
336 "word_get",
337 builtins::run_word_get,
338 );
339 register(
340 registry,
341 self.index.clone(),
342 builtins::BUILTIN_DEPS_GET,
343 "deps_get",
344 builtins::run_deps_get,
345 );
346 register(
347 registry,
348 self.index.clone(),
349 builtins::BUILTIN_OUTLINE_GET,
350 "outline_get",
351 builtins::run_outline_get,
352 );
353
354 // Change log.
355 register(
356 registry,
357 self.index.clone(),
358 builtins::BUILTIN_CURRENT_SEQ,
359 "current_seq",
360 builtins::run_current_seq,
361 );
362 register(
363 registry,
364 self.index.clone(),
365 builtins::BUILTIN_CHANGES_SINCE,
366 "changes_since",
367 builtins::run_changes_since,
368 );
369 register(
370 registry,
371 self.index.clone(),
372 builtins::BUILTIN_VERSION_RECORD,
373 "version_record",
374 builtins::run_version_record,
375 );
376
377 // Agent registry + locks.
378 register(
379 registry,
380 self.index.clone(),
381 builtins::BUILTIN_AGENT_REGISTER,
382 "agent_register",
383 builtins::run_agent_register,
384 );
385 register(
386 registry,
387 self.index.clone(),
388 builtins::BUILTIN_AGENT_HEARTBEAT,
389 "agent_heartbeat",
390 builtins::run_agent_heartbeat,
391 );
392 register(
393 registry,
394 self.index.clone(),
395 builtins::BUILTIN_AGENT_UNREGISTER,
396 "agent_unregister",
397 builtins::run_agent_unregister,
398 );
399 register(
400 registry,
401 self.index.clone(),
402 builtins::BUILTIN_LOCK_TRY,
403 "lock_try",
404 builtins::run_lock_try,
405 );
406 register(
407 registry,
408 self.index.clone(),
409 builtins::BUILTIN_LOCK_RELEASE,
410 "lock_release",
411 builtins::run_lock_release,
412 );
413 register(
414 registry,
415 self.index.clone(),
416 builtins::BUILTIN_STATUS,
417 "status",
418 builtins::run_status,
419 );
420
421 // `current_agent_id` is the only handler that reads from the
422 // capability's per-call `current_agent` slot rather than the
423 // index state, so it gets its own closure.
424 let slot = self.current_agent.clone();
425 let handler: SyncHandler =
426 Arc::new(move |args| builtins::run_current_agent_id(&slot, args));
427 registry.register(RegisteredBuiltin {
428 name: builtins::BUILTIN_CURRENT_AGENT_ID,
429 module: "code_index",
430 method: "current_agent_id",
431 handler,
432 });
433
434 // Typed symbol graph builtins (issue #2434).
435 register(
436 registry,
437 self.index.clone(),
438 builtins::BUILTIN_CYPHER,
439 "cypher",
440 builtins::run_cypher,
441 );
442 register(
443 registry,
444 self.index.clone(),
445 builtins::BUILTIN_BRANCH_OVERLAY,
446 "branch_overlay",
447 builtins::run_branch_overlay,
448 );
449 register(
450 registry,
451 self.index.clone(),
452 builtins::BUILTIN_FRESHNESS,
453 "freshness",
454 builtins::run_freshness,
455 );
456
457 // Cross-file safe rename (issue #2508). Builds on the typed
458 // symbol graph (#2434) and routes writes through staged-fs
459 // (#1722) so all touched files succeed or none do.
460 register(
461 registry,
462 self.index.clone(),
463 rename::BUILTIN,
464 "rename_symbol",
465 rename::run,
466 );
467 }
468}
469
470/// Programmatic entry point for callers that need to compose
471/// `rename_symbol` with another hostlib capability while sharing the
472/// same in-memory code-index state.
473pub(crate) fn run_rename_symbol(
474 index: &SharedIndex,
475 args: &[VmValue],
476) -> Result<VmValue, HostlibError> {
477 rename::run(index, args)
478}
479
480fn register(
481 registry: &mut BuiltinRegistry,
482 index: SharedIndex,
483 name: &'static str,
484 method: &'static str,
485 runner: fn(&SharedIndex, &[VmValue]) -> Result<VmValue, HostlibError>,
486) {
487 let captured = index;
488 let handler: SyncHandler = Arc::new(move |args| runner(&captured, args));
489 registry.register(RegisteredBuiltin {
490 name,
491 module: "code_index",
492 method,
493 handler,
494 });
495}