Skip to main content

harn_hostlib/code_index/
mod.rs

1//! Code index host capability.
2//!
3//! Deterministic trigram/word index plus live workspace state (agent
4//! registry, advisory locks, append-only version log, file id assignment,
5//! cached reads). The capability owns one [`SharedIndex`] cell per
6//! instance; cloning the capability shares state with every Harn VM that
7//! has been wired against it.
8//!
9//! Surface — every builtin is locked by `schemas/code_index/<method>.json`:
10//!
11//! ### Workspace queries (the original 5)
12//!
13//! | Builtin                          | What it does                                           |
14//! |----------------------------------|--------------------------------------------------------|
15//! | `hostlib_code_index_query`       | Trigram-accelerated literal substring search.          |
16//! | `hostlib_code_index_rebuild`     | Walk a workspace and (re)build the in-memory index.    |
17//! | `hostlib_code_index_stats`       | Count files/trigrams/words + last rebuild timestamp.   |
18//! | `hostlib_code_index_imports_for` | Imports declared by a single file (with resolutions).  |
19//! | `hostlib_code_index_importers_of`| Reverse lookup: who imports the given module/path?     |
20//!
21//! ### Live workspace state (added in #776)
22//!
23//! - **Agents**: `agent_register`, `agent_heartbeat`, `agent_unregister`,
24//!   `current_agent_id`, `status`.
25//! - **Locks**: `lock_try`, `lock_release`.
26//! - **Change log**: `current_seq`, `changes_since`, `version_record`.
27//! - **File table**: `path_to_id`, `id_to_path`, `file_ids`, `file_meta`,
28//!   `file_hash`.
29//! - **Cached reads**: `read_range`, `reindex_file`, `trigram_query`,
30//!   `extract_trigrams`, `word_get`, `deps_get`, `outline_get`.
31//!
32//! ### Typed symbol graph (added in #2434)
33//!
34//! - **`cypher`**: read-only Cypher executor over the typed graph
35//!   ([`SymbolGraph`]) — `MATCH ... WHERE ... RETURN` with typed
36//!   nodes (Function|Type|Module|Import|CallSite|Macro), typed edges
37//!   (CALLS|REFS|IMPORTS|CONTAINS|OVERRIDES, plus `_BY` inverses),
38//!   and variable-length hops up to depth 4.
39//! - **`branch_overlay`**: per-branch CDC overlay that layers a delta
40//!   on top of the base graph; reuses ≥95% of the main index in
41//!   storage/CPU for untouched files. See [`BranchOverlay`].
42//! - **`freshness`**: per-file hash + mtime comparison against the
43//!   indexed snapshot; consumers detect staleness without forcing a
44//!   rebuild.
45//!
46//! ### Cross-file safe rename (added in #2508)
47//!
48//! - **`rename_symbol`**: rewrite a symbol across `file | module |
49//!   workspace` using the typed graph for symbol resolution and
50//!   tree-sitter identifier kinds for safe text spans. Detects
51//!   `new_name` shadowing in any rewritten file and aborts before any
52//!   write. Routes through staged-fs (#1722) when a `session_id` is
53//!   supplied so all touched files succeed or none do.
54//!
55//! ## Concurrency model
56//!
57//! All ops serialise through a single `Arc<Mutex<Option<IndexState>>>` so
58//! the IDE editor, eval, and live agent all see one consistent view. The
59//! capability is `Send + Sync` so embedders can share it across threads,
60//! but the mutex still serialises actual work.
61
62mod agents;
63mod builtins;
64mod cypher;
65mod file_table;
66mod graph;
67mod imports;
68mod overlay;
69mod readonly;
70mod rename;
71mod snapshot;
72mod state;
73mod symbol_graph;
74mod trigram;
75mod versions;
76mod walker;
77mod words;
78
79use std::path::Path;
80use std::sync::{Arc, Mutex};
81
82use harn_vm::VmValue;
83
84use crate::error::HostlibError;
85use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
86
87pub use agents::{AgentId, AgentInfo, AgentRegistry, AgentState, RegistryConfig};
88pub use builtins::SharedIndex;
89pub use cypher::{CypherError, CypherRow, CypherValue};
90pub use file_table::{FileId, IndexedFile, IndexedSymbol};
91pub use graph::DepGraph;
92pub use overlay::{BranchOverlay, OverlayState};
93pub use readonly::ReadonlyRoots;
94pub use snapshot::{CodeIndexSnapshot, SnapshotMeta};
95pub use state::{BuildOutcome, IndexState};
96pub use symbol_graph::{Edge, EdgeKind, Node, NodeId, NodeKind, SymbolGraph};
97pub use trigram::TrigramIndex;
98pub use versions::{ChangeRecord, EditOp, VersionEntry, VersionLog, HISTORY_LIMIT};
99pub use words::{WordHit, WordIndex};
100
101/// Code-index capability handle.
102///
103/// Holds the [`SharedIndex`] cell behind an `Arc<Mutex<...>>`; cloning
104/// the capability shares state. The capability also threads a
105/// `current_agent_id` slot used by the `current_agent_id` host builtin —
106/// embedders update this slot from the request-handling layer so each
107/// host call surfaces the right agent identity to scripts.
108#[derive(Clone, Default)]
109pub struct CodeIndexCapability {
110    index: SharedIndex,
111    /// Additive, read-only secondary roots (issue #2403 follow-up). Live
112    /// beside the primary slot; query/read_range merge them in but no
113    /// mutating builtin ever touches them, so indexing a dependency root
114    /// never clobbers the project index.
115    readonly: ReadonlyRoots,
116    current_agent: Arc<Mutex<Option<AgentId>>>,
117}
118
119impl CodeIndexCapability {
120    /// Create a capability with an empty workspace slot. The first
121    /// `hostlib_code_index_rebuild` call populates it.
122    pub fn new() -> Self {
123        Self {
124            index: Arc::new(Mutex::new(None)),
125            readonly: Arc::new(Mutex::new(Vec::new())),
126            current_agent: Arc::new(Mutex::new(None)),
127        }
128    }
129
130    /// Borrow the underlying shared cell. Useful for tests and embedders
131    /// that want to introspect index state without going through the
132    /// builtins.
133    pub fn shared(&self) -> SharedIndex {
134        self.index.clone()
135    }
136
137    /// Borrow the current-agent slot. Embedders bind this slot before
138    /// dispatching a host call so that `current_agent_id` returns the
139    /// right value to the script.
140    pub fn current_agent_slot(&self) -> Arc<Mutex<Option<AgentId>>> {
141        self.current_agent.clone()
142    }
143
144    /// Convenience: set the current agent id. Returns the previous value
145    /// (so callers can restore on completion if they bind per-call).
146    pub fn set_current_agent(&self, id: Option<AgentId>) -> Option<AgentId> {
147        let mut guard = self.current_agent.lock().expect("current_agent poisoned");
148        std::mem::replace(&mut *guard, id)
149    }
150
151    /// Restore from a previously saved snapshot at the path returned by
152    /// [`CodeIndexSnapshot::path_for`]. After restoring, runs
153    /// [`IndexState::reap_after_recovery`] so stale agent records and
154    /// locks are dropped before the daemon serves traffic.
155    ///
156    /// Returns `true` on a successful restore, `false` if no snapshot
157    /// existed (or the format was unrecognised). Errors propagate I/O
158    /// problems verbatim so callers can decide whether to fall back to
159    /// `rebuild`.
160    pub fn restore_from_disk(&self, workspace_root: &Path) -> std::io::Result<bool> {
161        match CodeIndexSnapshot::load(workspace_root)? {
162            Some(snap) => {
163                let mut state = IndexState::from_snapshot(snap);
164                state.reap_after_recovery(state::now_unix_ms());
165                let mut guard = self.index.lock().expect("code_index mutex poisoned");
166                *guard = Some(state);
167                Ok(true)
168            }
169            None => Ok(false),
170        }
171    }
172
173    /// Persist the current in-memory state to the path returned by
174    /// [`CodeIndexSnapshot::path_for`]. Returns `Ok(false)` when the
175    /// capability is empty (nothing to save).
176    pub fn persist_to_disk(&self) -> std::io::Result<bool> {
177        let snap = {
178            let guard = self.index.lock().expect("code_index mutex poisoned");
179            guard
180                .as_ref()
181                .map(|state| (state.snapshot(), state.root.clone()))
182        };
183        match snap {
184            Some((snap, root)) => {
185                snap.save(&root)?;
186                Ok(true)
187            }
188            None => Ok(false),
189        }
190    }
191}
192
193impl HostlibCapability for CodeIndexCapability {
194    fn module_name(&self) -> &'static str {
195        "code_index"
196    }
197
198    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
199        // Workspace queries (original 5). `query` and `read_range` merge in
200        // the read-only secondary roots (issue #2403 follow-up), so they
201        // capture both the primary and the read-only cells.
202        {
203            let index = self.index.clone();
204            let readonly = self.readonly.clone();
205            let handler: SyncHandler =
206                Arc::new(move |args| builtins::run_query_merged(&index, Some(&readonly), args));
207            registry.register(RegisteredBuiltin {
208                name: builtins::BUILTIN_QUERY,
209                module: "code_index",
210                method: "query",
211                handler,
212            });
213        }
214        register(
215            registry,
216            self.index.clone(),
217            builtins::BUILTIN_REBUILD,
218            "rebuild",
219            builtins::run_rebuild,
220        );
221        register(
222            registry,
223            self.index.clone(),
224            builtins::BUILTIN_STATS,
225            "stats",
226            builtins::run_stats,
227        );
228        register(
229            registry,
230            self.index.clone(),
231            builtins::BUILTIN_IMPORTS_FOR,
232            "imports_for",
233            builtins::run_imports_for,
234        );
235        register(
236            registry,
237            self.index.clone(),
238            builtins::BUILTIN_IMPORTERS_OF,
239            "importers_of",
240            builtins::run_importers_of,
241        );
242
243        // Additive read-only secondary roots (issue #2403 follow-up).
244        // Captures the read-only cell directly — it never touches the
245        // primary index slot.
246        {
247            let readonly = self.readonly.clone();
248            let handler: SyncHandler =
249                Arc::new(move |args| readonly::run_add_readonly_roots(&readonly, args));
250            registry.register(RegisteredBuiltin {
251                name: readonly::BUILTIN_ADD_READONLY_ROOTS,
252                module: "code_index",
253                method: "add_readonly_roots",
254                handler,
255            });
256        }
257
258        // File table accessors.
259        register(
260            registry,
261            self.index.clone(),
262            builtins::BUILTIN_PATH_TO_ID,
263            "path_to_id",
264            builtins::run_path_to_id,
265        );
266        register(
267            registry,
268            self.index.clone(),
269            builtins::BUILTIN_ID_TO_PATH,
270            "id_to_path",
271            builtins::run_id_to_path,
272        );
273        register(
274            registry,
275            self.index.clone(),
276            builtins::BUILTIN_FILE_IDS,
277            "file_ids",
278            builtins::run_file_ids,
279        );
280        register(
281            registry,
282            self.index.clone(),
283            builtins::BUILTIN_FILE_META,
284            "file_meta",
285            builtins::run_file_meta,
286        );
287        register(
288            registry,
289            self.index.clone(),
290            builtins::BUILTIN_FILE_HASH,
291            "file_hash",
292            builtins::run_file_hash,
293        );
294
295        // Cached read paths. `read_range` falls back to the read-only
296        // secondary roots (issue #2403 follow-up) so a symbol discovered in
297        // a dependency root can be read back.
298        {
299            let index = self.index.clone();
300            let readonly = self.readonly.clone();
301            let handler: SyncHandler = Arc::new(move |args| {
302                builtins::run_read_range_merged(&index, Some(&readonly), args)
303            });
304            registry.register(RegisteredBuiltin {
305                name: builtins::BUILTIN_READ_RANGE,
306                module: "code_index",
307                method: "read_range",
308                handler,
309            });
310        }
311        register(
312            registry,
313            self.index.clone(),
314            builtins::BUILTIN_REINDEX_FILE,
315            "reindex_file",
316            builtins::run_reindex_file,
317        );
318        register(
319            registry,
320            self.index.clone(),
321            builtins::BUILTIN_TRIGRAM_QUERY,
322            "trigram_query",
323            builtins::run_trigram_query,
324        );
325        register(
326            registry,
327            self.index.clone(),
328            builtins::BUILTIN_EXTRACT_TRIGRAMS,
329            "extract_trigrams",
330            builtins::run_extract_trigrams,
331        );
332        register(
333            registry,
334            self.index.clone(),
335            builtins::BUILTIN_WORD_GET,
336            "word_get",
337            builtins::run_word_get,
338        );
339        register(
340            registry,
341            self.index.clone(),
342            builtins::BUILTIN_DEPS_GET,
343            "deps_get",
344            builtins::run_deps_get,
345        );
346        register(
347            registry,
348            self.index.clone(),
349            builtins::BUILTIN_OUTLINE_GET,
350            "outline_get",
351            builtins::run_outline_get,
352        );
353
354        // Change log.
355        register(
356            registry,
357            self.index.clone(),
358            builtins::BUILTIN_CURRENT_SEQ,
359            "current_seq",
360            builtins::run_current_seq,
361        );
362        register(
363            registry,
364            self.index.clone(),
365            builtins::BUILTIN_CHANGES_SINCE,
366            "changes_since",
367            builtins::run_changes_since,
368        );
369        register(
370            registry,
371            self.index.clone(),
372            builtins::BUILTIN_VERSION_RECORD,
373            "version_record",
374            builtins::run_version_record,
375        );
376
377        // Agent registry + locks.
378        register(
379            registry,
380            self.index.clone(),
381            builtins::BUILTIN_AGENT_REGISTER,
382            "agent_register",
383            builtins::run_agent_register,
384        );
385        register(
386            registry,
387            self.index.clone(),
388            builtins::BUILTIN_AGENT_HEARTBEAT,
389            "agent_heartbeat",
390            builtins::run_agent_heartbeat,
391        );
392        register(
393            registry,
394            self.index.clone(),
395            builtins::BUILTIN_AGENT_UNREGISTER,
396            "agent_unregister",
397            builtins::run_agent_unregister,
398        );
399        register(
400            registry,
401            self.index.clone(),
402            builtins::BUILTIN_LOCK_TRY,
403            "lock_try",
404            builtins::run_lock_try,
405        );
406        register(
407            registry,
408            self.index.clone(),
409            builtins::BUILTIN_LOCK_RELEASE,
410            "lock_release",
411            builtins::run_lock_release,
412        );
413        register(
414            registry,
415            self.index.clone(),
416            builtins::BUILTIN_STATUS,
417            "status",
418            builtins::run_status,
419        );
420
421        // `current_agent_id` is the only handler that reads from the
422        // capability's per-call `current_agent` slot rather than the
423        // index state, so it gets its own closure.
424        let slot = self.current_agent.clone();
425        let handler: SyncHandler =
426            Arc::new(move |args| builtins::run_current_agent_id(&slot, args));
427        registry.register(RegisteredBuiltin {
428            name: builtins::BUILTIN_CURRENT_AGENT_ID,
429            module: "code_index",
430            method: "current_agent_id",
431            handler,
432        });
433
434        // Typed symbol graph builtins (issue #2434).
435        register(
436            registry,
437            self.index.clone(),
438            builtins::BUILTIN_CYPHER,
439            "cypher",
440            builtins::run_cypher,
441        );
442        register(
443            registry,
444            self.index.clone(),
445            builtins::BUILTIN_BRANCH_OVERLAY,
446            "branch_overlay",
447            builtins::run_branch_overlay,
448        );
449        register(
450            registry,
451            self.index.clone(),
452            builtins::BUILTIN_FRESHNESS,
453            "freshness",
454            builtins::run_freshness,
455        );
456
457        // Cross-file safe rename (issue #2508). Builds on the typed
458        // symbol graph (#2434) and routes writes through staged-fs
459        // (#1722) so all touched files succeed or none do.
460        register(
461            registry,
462            self.index.clone(),
463            rename::BUILTIN,
464            "rename_symbol",
465            rename::run,
466        );
467    }
468}
469
470/// Programmatic entry point for callers that need to compose
471/// `rename_symbol` with another hostlib capability while sharing the
472/// same in-memory code-index state.
473pub(crate) fn run_rename_symbol(
474    index: &SharedIndex,
475    args: &[VmValue],
476) -> Result<VmValue, HostlibError> {
477    rename::run(index, args)
478}
479
480fn register(
481    registry: &mut BuiltinRegistry,
482    index: SharedIndex,
483    name: &'static str,
484    method: &'static str,
485    runner: fn(&SharedIndex, &[VmValue]) -> Result<VmValue, HostlibError>,
486) {
487    let captured = index;
488    let handler: SyncHandler = Arc::new(move |args| runner(&captured, args));
489    registry.register(RegisteredBuiltin {
490        name,
491        module: "code_index",
492        method,
493        handler,
494    });
495}