Skip to main content

harn_hostlib/code_index/
mod.rs

1//! Code index host capability.
2//!
3//! Deterministic trigram/word index plus live workspace state (agent
4//! registry, advisory locks, append-only version log, file id assignment,
5//! cached reads). The capability owns one [`SharedIndex`] cell per
6//! instance; cloning the capability shares state with every Harn VM that
7//! has been wired against it.
8//!
9//! Surface — every builtin is locked by `schemas/code_index/<method>.json`:
10//!
11//! ### Workspace queries (the original 5)
12//!
13//! | Builtin                          | What it does                                           |
14//! |----------------------------------|--------------------------------------------------------|
15//! | `hostlib_code_index_query`       | Trigram-accelerated literal substring search.          |
16//! | `hostlib_code_index_rebuild`     | Walk a workspace and (re)build the in-memory index.    |
17//! | `hostlib_code_index_stats`       | Count files/trigrams/words + last rebuild timestamp.   |
18//! | `hostlib_code_index_imports_for` | Imports declared by a single file (with resolutions).  |
19//! | `hostlib_code_index_importers_of`| Reverse lookup: who imports the given module/path?     |
20//!
21//! ### Live workspace state (added in #776)
22//!
23//! - **Agents**: `agent_register`, `agent_heartbeat`, `agent_unregister`,
24//!   `current_agent_id`, `status`.
25//! - **Locks**: `lock_try`, `lock_release`.
26//! - **Change log**: `current_seq`, `changes_since`, `version_record`.
27//! - **File table**: `path_to_id`, `id_to_path`, `file_ids`, `file_meta`,
28//!   `file_hash`.
29//! - **Cached reads**: `read_range`, `reindex_file`, `trigram_query`,
30//!   `extract_trigrams`, `word_get`, `deps_get`, `outline_get`.
31//!
32//! ### Typed symbol graph (added in #2434)
33//!
34//! - **`cypher`**: read-only Cypher executor over the typed graph
35//!   ([`SymbolGraph`]) — `MATCH ... WHERE ... RETURN` with typed
36//!   nodes (Function|Type|Module|Import|CallSite|Macro), typed edges
37//!   (CALLS|REFS|IMPORTS|CONTAINS|OVERRIDES, plus `_BY` inverses),
38//!   and variable-length hops up to depth 4.
39//! - **`branch_overlay`**: per-branch CDC overlay that layers a delta
40//!   on top of the base graph; reuses ≥95% of the main index in
41//!   storage/CPU for untouched files. See [`BranchOverlay`].
42//! - **`freshness`**: per-file hash + mtime comparison against the
43//!   indexed snapshot; consumers detect staleness without forcing a
44//!   rebuild.
45//!
46//! ### Cross-file safe rename (added in #2508)
47//!
48//! - **`rename_symbol`**: rewrite a symbol across `file | module |
49//!   workspace` using the typed graph for symbol resolution and
50//!   tree-sitter identifier kinds for safe text spans. Detects
51//!   `new_name` shadowing in any rewritten file and aborts before any
52//!   write. Routes through staged-fs (#1722) when a `session_id` is
53//!   supplied so all touched files succeed or none do.
54//!
55//! ## Concurrency model
56//!
57//! All ops serialise through a single `Arc<Mutex<Option<IndexState>>>` so
58//! the IDE editor, eval, and live agent all see one consistent view. The
59//! capability is `Send + Sync` so embedders can share it across threads,
60//! but the mutex still serialises actual work.
61
62mod agents;
63mod builtins;
64mod cypher;
65mod file_table;
66mod graph;
67mod imports;
68mod overlay;
69mod rename;
70mod snapshot;
71mod state;
72mod symbol_graph;
73mod trigram;
74mod versions;
75mod walker;
76mod words;
77
78use std::path::Path;
79use std::sync::{Arc, Mutex};
80
81use harn_vm::VmValue;
82
83use crate::error::HostlibError;
84use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
85
86pub use agents::{AgentId, AgentInfo, AgentRegistry, AgentState, RegistryConfig};
87pub use builtins::SharedIndex;
88pub use cypher::{CypherError, CypherRow, CypherValue};
89pub use file_table::{FileId, IndexedFile, IndexedSymbol};
90pub use graph::DepGraph;
91pub use overlay::{BranchOverlay, OverlayState};
92pub use snapshot::{CodeIndexSnapshot, SnapshotMeta};
93pub use state::{BuildOutcome, IndexState};
94pub use symbol_graph::{Edge, EdgeKind, Node, NodeId, NodeKind, SymbolGraph};
95pub use trigram::TrigramIndex;
96pub use versions::{ChangeRecord, EditOp, VersionEntry, VersionLog, HISTORY_LIMIT};
97pub use words::{WordHit, WordIndex};
98
99/// Code-index capability handle.
100///
101/// Holds the [`SharedIndex`] cell behind an `Arc<Mutex<...>>`; cloning
102/// the capability shares state. The capability also threads a
103/// `current_agent_id` slot used by the `current_agent_id` host builtin —
104/// embedders update this slot from the request-handling layer so each
105/// host call surfaces the right agent identity to scripts.
106#[derive(Clone, Default)]
107pub struct CodeIndexCapability {
108    index: SharedIndex,
109    current_agent: Arc<Mutex<Option<AgentId>>>,
110}
111
112impl CodeIndexCapability {
113    /// Create a capability with an empty workspace slot. The first
114    /// `hostlib_code_index_rebuild` call populates it.
115    pub fn new() -> Self {
116        Self {
117            index: Arc::new(Mutex::new(None)),
118            current_agent: Arc::new(Mutex::new(None)),
119        }
120    }
121
122    /// Borrow the underlying shared cell. Useful for tests and embedders
123    /// that want to introspect index state without going through the
124    /// builtins.
125    pub fn shared(&self) -> SharedIndex {
126        self.index.clone()
127    }
128
129    /// Borrow the current-agent slot. Embedders bind this slot before
130    /// dispatching a host call so that `current_agent_id` returns the
131    /// right value to the script.
132    pub fn current_agent_slot(&self) -> Arc<Mutex<Option<AgentId>>> {
133        self.current_agent.clone()
134    }
135
136    /// Convenience: set the current agent id. Returns the previous value
137    /// (so callers can restore on completion if they bind per-call).
138    pub fn set_current_agent(&self, id: Option<AgentId>) -> Option<AgentId> {
139        let mut guard = self.current_agent.lock().expect("current_agent poisoned");
140        std::mem::replace(&mut *guard, id)
141    }
142
143    /// Restore from a previously saved snapshot at the path returned by
144    /// [`CodeIndexSnapshot::path_for`]. After restoring, runs
145    /// [`IndexState::reap_after_recovery`] so stale agent records and
146    /// locks are dropped before the daemon serves traffic.
147    ///
148    /// Returns `true` on a successful restore, `false` if no snapshot
149    /// existed (or the format was unrecognised). Errors propagate I/O
150    /// problems verbatim so callers can decide whether to fall back to
151    /// `rebuild`.
152    pub fn restore_from_disk(&self, workspace_root: &Path) -> std::io::Result<bool> {
153        match CodeIndexSnapshot::load(workspace_root)? {
154            Some(snap) => {
155                let mut state = IndexState::from_snapshot(snap);
156                state.reap_after_recovery(state::now_unix_ms());
157                let mut guard = self.index.lock().expect("code_index mutex poisoned");
158                *guard = Some(state);
159                Ok(true)
160            }
161            None => Ok(false),
162        }
163    }
164
165    /// Persist the current in-memory state to the path returned by
166    /// [`CodeIndexSnapshot::path_for`]. Returns `Ok(false)` when the
167    /// capability is empty (nothing to save).
168    pub fn persist_to_disk(&self) -> std::io::Result<bool> {
169        let snap = {
170            let guard = self.index.lock().expect("code_index mutex poisoned");
171            guard
172                .as_ref()
173                .map(|state| (state.snapshot(), state.root.clone()))
174        };
175        match snap {
176            Some((snap, root)) => {
177                snap.save(&root)?;
178                Ok(true)
179            }
180            None => Ok(false),
181        }
182    }
183}
184
185impl HostlibCapability for CodeIndexCapability {
186    fn module_name(&self) -> &'static str {
187        "code_index"
188    }
189
190    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
191        // Workspace queries (original 5).
192        register(
193            registry,
194            self.index.clone(),
195            builtins::BUILTIN_QUERY,
196            "query",
197            builtins::run_query,
198        );
199        register(
200            registry,
201            self.index.clone(),
202            builtins::BUILTIN_REBUILD,
203            "rebuild",
204            builtins::run_rebuild,
205        );
206        register(
207            registry,
208            self.index.clone(),
209            builtins::BUILTIN_STATS,
210            "stats",
211            builtins::run_stats,
212        );
213        register(
214            registry,
215            self.index.clone(),
216            builtins::BUILTIN_IMPORTS_FOR,
217            "imports_for",
218            builtins::run_imports_for,
219        );
220        register(
221            registry,
222            self.index.clone(),
223            builtins::BUILTIN_IMPORTERS_OF,
224            "importers_of",
225            builtins::run_importers_of,
226        );
227
228        // File table accessors.
229        register(
230            registry,
231            self.index.clone(),
232            builtins::BUILTIN_PATH_TO_ID,
233            "path_to_id",
234            builtins::run_path_to_id,
235        );
236        register(
237            registry,
238            self.index.clone(),
239            builtins::BUILTIN_ID_TO_PATH,
240            "id_to_path",
241            builtins::run_id_to_path,
242        );
243        register(
244            registry,
245            self.index.clone(),
246            builtins::BUILTIN_FILE_IDS,
247            "file_ids",
248            builtins::run_file_ids,
249        );
250        register(
251            registry,
252            self.index.clone(),
253            builtins::BUILTIN_FILE_META,
254            "file_meta",
255            builtins::run_file_meta,
256        );
257        register(
258            registry,
259            self.index.clone(),
260            builtins::BUILTIN_FILE_HASH,
261            "file_hash",
262            builtins::run_file_hash,
263        );
264
265        // Cached read paths.
266        register(
267            registry,
268            self.index.clone(),
269            builtins::BUILTIN_READ_RANGE,
270            "read_range",
271            builtins::run_read_range,
272        );
273        register(
274            registry,
275            self.index.clone(),
276            builtins::BUILTIN_REINDEX_FILE,
277            "reindex_file",
278            builtins::run_reindex_file,
279        );
280        register(
281            registry,
282            self.index.clone(),
283            builtins::BUILTIN_TRIGRAM_QUERY,
284            "trigram_query",
285            builtins::run_trigram_query,
286        );
287        register(
288            registry,
289            self.index.clone(),
290            builtins::BUILTIN_EXTRACT_TRIGRAMS,
291            "extract_trigrams",
292            builtins::run_extract_trigrams,
293        );
294        register(
295            registry,
296            self.index.clone(),
297            builtins::BUILTIN_WORD_GET,
298            "word_get",
299            builtins::run_word_get,
300        );
301        register(
302            registry,
303            self.index.clone(),
304            builtins::BUILTIN_DEPS_GET,
305            "deps_get",
306            builtins::run_deps_get,
307        );
308        register(
309            registry,
310            self.index.clone(),
311            builtins::BUILTIN_OUTLINE_GET,
312            "outline_get",
313            builtins::run_outline_get,
314        );
315
316        // Change log.
317        register(
318            registry,
319            self.index.clone(),
320            builtins::BUILTIN_CURRENT_SEQ,
321            "current_seq",
322            builtins::run_current_seq,
323        );
324        register(
325            registry,
326            self.index.clone(),
327            builtins::BUILTIN_CHANGES_SINCE,
328            "changes_since",
329            builtins::run_changes_since,
330        );
331        register(
332            registry,
333            self.index.clone(),
334            builtins::BUILTIN_VERSION_RECORD,
335            "version_record",
336            builtins::run_version_record,
337        );
338
339        // Agent registry + locks.
340        register(
341            registry,
342            self.index.clone(),
343            builtins::BUILTIN_AGENT_REGISTER,
344            "agent_register",
345            builtins::run_agent_register,
346        );
347        register(
348            registry,
349            self.index.clone(),
350            builtins::BUILTIN_AGENT_HEARTBEAT,
351            "agent_heartbeat",
352            builtins::run_agent_heartbeat,
353        );
354        register(
355            registry,
356            self.index.clone(),
357            builtins::BUILTIN_AGENT_UNREGISTER,
358            "agent_unregister",
359            builtins::run_agent_unregister,
360        );
361        register(
362            registry,
363            self.index.clone(),
364            builtins::BUILTIN_LOCK_TRY,
365            "lock_try",
366            builtins::run_lock_try,
367        );
368        register(
369            registry,
370            self.index.clone(),
371            builtins::BUILTIN_LOCK_RELEASE,
372            "lock_release",
373            builtins::run_lock_release,
374        );
375        register(
376            registry,
377            self.index.clone(),
378            builtins::BUILTIN_STATUS,
379            "status",
380            builtins::run_status,
381        );
382
383        // `current_agent_id` is the only handler that reads from the
384        // capability's per-call `current_agent` slot rather than the
385        // index state, so it gets its own closure.
386        let slot = self.current_agent.clone();
387        let handler: SyncHandler =
388            Arc::new(move |args| builtins::run_current_agent_id(&slot, args));
389        registry.register(RegisteredBuiltin {
390            name: builtins::BUILTIN_CURRENT_AGENT_ID,
391            module: "code_index",
392            method: "current_agent_id",
393            handler,
394        });
395
396        // Typed symbol graph builtins (issue #2434).
397        register(
398            registry,
399            self.index.clone(),
400            builtins::BUILTIN_CYPHER,
401            "cypher",
402            builtins::run_cypher,
403        );
404        register(
405            registry,
406            self.index.clone(),
407            builtins::BUILTIN_BRANCH_OVERLAY,
408            "branch_overlay",
409            builtins::run_branch_overlay,
410        );
411        register(
412            registry,
413            self.index.clone(),
414            builtins::BUILTIN_FRESHNESS,
415            "freshness",
416            builtins::run_freshness,
417        );
418
419        // Cross-file safe rename (issue #2508). Builds on the typed
420        // symbol graph (#2434) and routes writes through staged-fs
421        // (#1722) so all touched files succeed or none do.
422        register(
423            registry,
424            self.index.clone(),
425            rename::BUILTIN,
426            "rename_symbol",
427            rename::run,
428        );
429    }
430}
431
432/// Programmatic entry point for callers that need to compose
433/// `rename_symbol` with another hostlib capability while sharing the
434/// same in-memory code-index state.
435pub(crate) fn run_rename_symbol(
436    index: &SharedIndex,
437    args: &[VmValue],
438) -> Result<VmValue, HostlibError> {
439    rename::run(index, args)
440}
441
442fn register(
443    registry: &mut BuiltinRegistry,
444    index: SharedIndex,
445    name: &'static str,
446    method: &'static str,
447    runner: fn(&SharedIndex, &[VmValue]) -> Result<VmValue, HostlibError>,
448) {
449    let captured = index;
450    let handler: SyncHandler = Arc::new(move |args| runner(&captured, args));
451    registry.register(RegisteredBuiltin {
452        name,
453        module: "code_index",
454        method,
455        handler,
456    });
457}