harn_hostlib/code_index/mod.rs
1//! Code index host capability.
2//!
3//! Deterministic trigram/word index plus live workspace state (agent
4//! registry, advisory locks, append-only version log, file id assignment,
5//! cached reads). The capability owns one [`SharedIndex`] cell per
6//! instance; cloning the capability shares state with every Harn VM that
7//! has been wired against it.
8//!
9//! Surface — every builtin is locked by `schemas/code_index/<method>.json`:
10//!
11//! ### Workspace queries (the original 5)
12//!
13//! | Builtin | What it does |
14//! |----------------------------------|--------------------------------------------------------|
15//! | `hostlib_code_index_query` | Trigram-accelerated literal substring search. |
16//! | `hostlib_code_index_rebuild` | Walk a workspace and (re)build the in-memory index. |
17//! | `hostlib_code_index_stats` | Count files/trigrams/words + last rebuild timestamp. |
18//! | `hostlib_code_index_imports_for` | Imports declared by a single file (with resolutions). |
19//! | `hostlib_code_index_importers_of`| Reverse lookup: who imports the given module/path? |
20//!
21//! ### Live workspace state (added in #776)
22//!
23//! - **Agents**: `agent_register`, `agent_heartbeat`, `agent_unregister`,
24//! `current_agent_id`, `status`.
25//! - **Locks**: `lock_try`, `lock_release`.
26//! - **Change log**: `current_seq`, `changes_since`, `version_record`.
27//! - **File table**: `path_to_id`, `id_to_path`, `file_ids`, `file_meta`,
28//! `file_hash`.
29//! - **Cached reads**: `read_range`, `reindex_file`, `trigram_query`,
30//! `extract_trigrams`, `word_get`, `deps_get`, `outline_get`.
31//!
32//! ### Typed symbol graph (added in #2434)
33//!
34//! - **`cypher`**: read-only Cypher executor over the typed graph
35//! ([`SymbolGraph`]) — `MATCH ... WHERE ... RETURN` with typed
36//! nodes (Function|Type|Module|Import|CallSite|Macro), typed edges
37//! (CALLS|REFS|IMPORTS|CONTAINS|OVERRIDES, plus `_BY` inverses),
38//! and variable-length hops up to depth 4.
39//! - **`branch_overlay`**: per-branch CDC overlay that layers a delta
40//! on top of the base graph; reuses ≥95% of the main index in
41//! storage/CPU for untouched files. See [`BranchOverlay`].
42//! - **`freshness`**: per-file hash + mtime comparison against the
43//! indexed snapshot; consumers detect staleness without forcing a
44//! rebuild.
45//!
46//! ### Cross-file safe rename (added in #2508)
47//!
48//! - **`rename_symbol`**: rewrite a symbol across `file | module |
49//! workspace` using the typed graph for symbol resolution and
50//! tree-sitter identifier kinds for safe text spans. Detects
51//! `new_name` shadowing in any rewritten file and aborts before any
52//! write. Routes through staged-fs (#1722) when a `session_id` is
53//! supplied so all touched files succeed or none do.
54//!
55//! ## Concurrency model
56//!
57//! All ops serialise through a single `Arc<Mutex<Option<IndexState>>>` so
58//! the IDE editor, eval, and live agent all see one consistent view. The
59//! capability is `Send + Sync` so embedders can share it across threads,
60//! but the mutex still serialises actual work.
61
62mod agents;
63mod builtins;
64mod cypher;
65mod file_table;
66mod graph;
67mod imports;
68mod overlay;
69mod rename;
70mod snapshot;
71mod state;
72mod symbol_graph;
73mod trigram;
74mod versions;
75mod walker;
76mod words;
77
78use std::path::Path;
79use std::sync::{Arc, Mutex};
80
81use harn_vm::VmValue;
82
83use crate::error::HostlibError;
84use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
85
86pub use agents::{AgentId, AgentInfo, AgentRegistry, AgentState, RegistryConfig};
87pub use builtins::SharedIndex;
88pub use cypher::{CypherError, CypherRow, CypherValue};
89pub use file_table::{FileId, IndexedFile, IndexedSymbol};
90pub use graph::DepGraph;
91pub use overlay::{BranchOverlay, OverlayState};
92pub use snapshot::{CodeIndexSnapshot, SnapshotMeta};
93pub use state::{BuildOutcome, IndexState};
94pub use symbol_graph::{Edge, EdgeKind, Node, NodeId, NodeKind, SymbolGraph};
95pub use trigram::TrigramIndex;
96pub use versions::{ChangeRecord, EditOp, VersionEntry, VersionLog, HISTORY_LIMIT};
97pub use words::{WordHit, WordIndex};
98
99/// Code-index capability handle.
100///
101/// Holds the [`SharedIndex`] cell behind an `Arc<Mutex<...>>`; cloning
102/// the capability shares state. The capability also threads a
103/// `current_agent_id` slot used by the `current_agent_id` host builtin —
104/// embedders update this slot from the request-handling layer so each
105/// host call surfaces the right agent identity to scripts.
106#[derive(Clone, Default)]
107pub struct CodeIndexCapability {
108 index: SharedIndex,
109 current_agent: Arc<Mutex<Option<AgentId>>>,
110}
111
112impl CodeIndexCapability {
113 /// Create a capability with an empty workspace slot. The first
114 /// `hostlib_code_index_rebuild` call populates it.
115 pub fn new() -> Self {
116 Self {
117 index: Arc::new(Mutex::new(None)),
118 current_agent: Arc::new(Mutex::new(None)),
119 }
120 }
121
122 /// Borrow the underlying shared cell. Useful for tests and embedders
123 /// that want to introspect index state without going through the
124 /// builtins.
125 pub fn shared(&self) -> SharedIndex {
126 self.index.clone()
127 }
128
129 /// Borrow the current-agent slot. Embedders bind this slot before
130 /// dispatching a host call so that `current_agent_id` returns the
131 /// right value to the script.
132 pub fn current_agent_slot(&self) -> Arc<Mutex<Option<AgentId>>> {
133 self.current_agent.clone()
134 }
135
136 /// Convenience: set the current agent id. Returns the previous value
137 /// (so callers can restore on completion if they bind per-call).
138 pub fn set_current_agent(&self, id: Option<AgentId>) -> Option<AgentId> {
139 let mut guard = self.current_agent.lock().expect("current_agent poisoned");
140 std::mem::replace(&mut *guard, id)
141 }
142
143 /// Restore from a previously saved snapshot at the path returned by
144 /// [`CodeIndexSnapshot::path_for`]. After restoring, runs
145 /// [`IndexState::reap_after_recovery`] so stale agent records and
146 /// locks are dropped before the daemon serves traffic.
147 ///
148 /// Returns `true` on a successful restore, `false` if no snapshot
149 /// existed (or the format was unrecognised). Errors propagate I/O
150 /// problems verbatim so callers can decide whether to fall back to
151 /// `rebuild`.
152 pub fn restore_from_disk(&self, workspace_root: &Path) -> std::io::Result<bool> {
153 match CodeIndexSnapshot::load(workspace_root)? {
154 Some(snap) => {
155 let mut state = IndexState::from_snapshot(snap);
156 state.reap_after_recovery(state::now_unix_ms());
157 let mut guard = self.index.lock().expect("code_index mutex poisoned");
158 *guard = Some(state);
159 Ok(true)
160 }
161 None => Ok(false),
162 }
163 }
164
165 /// Persist the current in-memory state to the path returned by
166 /// [`CodeIndexSnapshot::path_for`]. Returns `Ok(false)` when the
167 /// capability is empty (nothing to save).
168 pub fn persist_to_disk(&self) -> std::io::Result<bool> {
169 let snap = {
170 let guard = self.index.lock().expect("code_index mutex poisoned");
171 guard
172 .as_ref()
173 .map(|state| (state.snapshot(), state.root.clone()))
174 };
175 match snap {
176 Some((snap, root)) => {
177 snap.save(&root)?;
178 Ok(true)
179 }
180 None => Ok(false),
181 }
182 }
183}
184
185impl HostlibCapability for CodeIndexCapability {
186 fn module_name(&self) -> &'static str {
187 "code_index"
188 }
189
190 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
191 // Workspace queries (original 5).
192 register(
193 registry,
194 self.index.clone(),
195 builtins::BUILTIN_QUERY,
196 "query",
197 builtins::run_query,
198 );
199 register(
200 registry,
201 self.index.clone(),
202 builtins::BUILTIN_REBUILD,
203 "rebuild",
204 builtins::run_rebuild,
205 );
206 register(
207 registry,
208 self.index.clone(),
209 builtins::BUILTIN_STATS,
210 "stats",
211 builtins::run_stats,
212 );
213 register(
214 registry,
215 self.index.clone(),
216 builtins::BUILTIN_IMPORTS_FOR,
217 "imports_for",
218 builtins::run_imports_for,
219 );
220 register(
221 registry,
222 self.index.clone(),
223 builtins::BUILTIN_IMPORTERS_OF,
224 "importers_of",
225 builtins::run_importers_of,
226 );
227
228 // File table accessors.
229 register(
230 registry,
231 self.index.clone(),
232 builtins::BUILTIN_PATH_TO_ID,
233 "path_to_id",
234 builtins::run_path_to_id,
235 );
236 register(
237 registry,
238 self.index.clone(),
239 builtins::BUILTIN_ID_TO_PATH,
240 "id_to_path",
241 builtins::run_id_to_path,
242 );
243 register(
244 registry,
245 self.index.clone(),
246 builtins::BUILTIN_FILE_IDS,
247 "file_ids",
248 builtins::run_file_ids,
249 );
250 register(
251 registry,
252 self.index.clone(),
253 builtins::BUILTIN_FILE_META,
254 "file_meta",
255 builtins::run_file_meta,
256 );
257 register(
258 registry,
259 self.index.clone(),
260 builtins::BUILTIN_FILE_HASH,
261 "file_hash",
262 builtins::run_file_hash,
263 );
264
265 // Cached read paths.
266 register(
267 registry,
268 self.index.clone(),
269 builtins::BUILTIN_READ_RANGE,
270 "read_range",
271 builtins::run_read_range,
272 );
273 register(
274 registry,
275 self.index.clone(),
276 builtins::BUILTIN_REINDEX_FILE,
277 "reindex_file",
278 builtins::run_reindex_file,
279 );
280 register(
281 registry,
282 self.index.clone(),
283 builtins::BUILTIN_TRIGRAM_QUERY,
284 "trigram_query",
285 builtins::run_trigram_query,
286 );
287 register(
288 registry,
289 self.index.clone(),
290 builtins::BUILTIN_EXTRACT_TRIGRAMS,
291 "extract_trigrams",
292 builtins::run_extract_trigrams,
293 );
294 register(
295 registry,
296 self.index.clone(),
297 builtins::BUILTIN_WORD_GET,
298 "word_get",
299 builtins::run_word_get,
300 );
301 register(
302 registry,
303 self.index.clone(),
304 builtins::BUILTIN_DEPS_GET,
305 "deps_get",
306 builtins::run_deps_get,
307 );
308 register(
309 registry,
310 self.index.clone(),
311 builtins::BUILTIN_OUTLINE_GET,
312 "outline_get",
313 builtins::run_outline_get,
314 );
315
316 // Change log.
317 register(
318 registry,
319 self.index.clone(),
320 builtins::BUILTIN_CURRENT_SEQ,
321 "current_seq",
322 builtins::run_current_seq,
323 );
324 register(
325 registry,
326 self.index.clone(),
327 builtins::BUILTIN_CHANGES_SINCE,
328 "changes_since",
329 builtins::run_changes_since,
330 );
331 register(
332 registry,
333 self.index.clone(),
334 builtins::BUILTIN_VERSION_RECORD,
335 "version_record",
336 builtins::run_version_record,
337 );
338
339 // Agent registry + locks.
340 register(
341 registry,
342 self.index.clone(),
343 builtins::BUILTIN_AGENT_REGISTER,
344 "agent_register",
345 builtins::run_agent_register,
346 );
347 register(
348 registry,
349 self.index.clone(),
350 builtins::BUILTIN_AGENT_HEARTBEAT,
351 "agent_heartbeat",
352 builtins::run_agent_heartbeat,
353 );
354 register(
355 registry,
356 self.index.clone(),
357 builtins::BUILTIN_AGENT_UNREGISTER,
358 "agent_unregister",
359 builtins::run_agent_unregister,
360 );
361 register(
362 registry,
363 self.index.clone(),
364 builtins::BUILTIN_LOCK_TRY,
365 "lock_try",
366 builtins::run_lock_try,
367 );
368 register(
369 registry,
370 self.index.clone(),
371 builtins::BUILTIN_LOCK_RELEASE,
372 "lock_release",
373 builtins::run_lock_release,
374 );
375 register(
376 registry,
377 self.index.clone(),
378 builtins::BUILTIN_STATUS,
379 "status",
380 builtins::run_status,
381 );
382
383 // `current_agent_id` is the only handler that reads from the
384 // capability's per-call `current_agent` slot rather than the
385 // index state, so it gets its own closure.
386 let slot = self.current_agent.clone();
387 let handler: SyncHandler =
388 Arc::new(move |args| builtins::run_current_agent_id(&slot, args));
389 registry.register(RegisteredBuiltin {
390 name: builtins::BUILTIN_CURRENT_AGENT_ID,
391 module: "code_index",
392 method: "current_agent_id",
393 handler,
394 });
395
396 // Typed symbol graph builtins (issue #2434).
397 register(
398 registry,
399 self.index.clone(),
400 builtins::BUILTIN_CYPHER,
401 "cypher",
402 builtins::run_cypher,
403 );
404 register(
405 registry,
406 self.index.clone(),
407 builtins::BUILTIN_BRANCH_OVERLAY,
408 "branch_overlay",
409 builtins::run_branch_overlay,
410 );
411 register(
412 registry,
413 self.index.clone(),
414 builtins::BUILTIN_FRESHNESS,
415 "freshness",
416 builtins::run_freshness,
417 );
418
419 // Cross-file safe rename (issue #2508). Builds on the typed
420 // symbol graph (#2434) and routes writes through staged-fs
421 // (#1722) so all touched files succeed or none do.
422 register(
423 registry,
424 self.index.clone(),
425 rename::BUILTIN,
426 "rename_symbol",
427 rename::run,
428 );
429 }
430}
431
432fn register(
433 registry: &mut BuiltinRegistry,
434 index: SharedIndex,
435 name: &'static str,
436 method: &'static str,
437 runner: fn(&SharedIndex, &[VmValue]) -> Result<VmValue, HostlibError>,
438) {
439 let captured = index;
440 let handler: SyncHandler = Arc::new(move |args| runner(&captured, args));
441 registry.register(RegisteredBuiltin {
442 name,
443 module: "code_index",
444 method,
445 handler,
446 });
447}