Skip to main content

harn_hostlib/code_index/
mod.rs

1//! Code index host capability.
2//!
3//! Deterministic trigram/word index plus live workspace state (agent
4//! registry, advisory locks, append-only version log, file id assignment,
5//! cached reads). The capability owns one [`SharedIndex`] cell per
6//! instance; cloning the capability shares state with every Harn VM that
7//! has been wired against it.
8//!
9//! Surface — every builtin is locked by `schemas/code_index/<method>.json`:
10//!
11//! ### Workspace queries (the original 5)
12//!
13//! | Builtin                          | What it does                                           |
14//! |----------------------------------|--------------------------------------------------------|
15//! | `hostlib_code_index_query`       | Trigram-accelerated literal substring search.          |
16//! | `hostlib_code_index_rebuild`     | Walk a workspace and (re)build the in-memory index.    |
17//! | `hostlib_code_index_stats`       | Count files/trigrams/words + last rebuild timestamp.   |
18//! | `hostlib_code_index_imports_for` | Imports declared by a single file (with resolutions).  |
19//! | `hostlib_code_index_importers_of`| Reverse lookup: who imports the given module/path?     |
20//!
21//! ### Live workspace state (added in #776)
22//!
23//! - **Agents**: `agent_register`, `agent_heartbeat`, `agent_unregister`,
24//!   `current_agent_id`, `status`.
25//! - **Locks**: `lock_try`, `lock_release`.
26//! - **Change log**: `current_seq`, `changes_since`, `version_record`.
27//! - **File table**: `path_to_id`, `id_to_path`, `file_ids`, `file_meta`,
28//!   `file_hash`.
29//! - **Cached reads**: `read_range`, `reindex_file`, `trigram_query`,
30//!   `extract_trigrams`, `word_get`, `deps_get`, `outline_get`.
31//!
32//! ### Typed symbol graph (added in #2434)
33//!
34//! - **`cypher`**: read-only Cypher executor over the typed graph
35//!   ([`SymbolGraph`]) — `MATCH ... WHERE ... RETURN` with typed
36//!   nodes (Function|Type|Module|Import|CallSite|Macro), typed edges
37//!   (CALLS|REFS|IMPORTS|CONTAINS|OVERRIDES, plus `_BY` inverses),
38//!   and variable-length hops up to depth 4.
39//! - **`branch_overlay`**: per-branch CDC overlay that layers a delta
40//!   on top of the base graph; reuses ≥95% of the main index in
41//!   storage/CPU for untouched files. See [`BranchOverlay`].
42//! - **`freshness`**: per-file hash + mtime comparison against the
43//!   indexed snapshot; consumers detect staleness without forcing a
44//!   rebuild.
45//!
46//! ## Concurrency model
47//!
48//! All ops serialise through a single `Arc<Mutex<Option<IndexState>>>` so
49//! the IDE editor, eval, and live agent all see one consistent view. The
50//! capability is `Send + Sync` so embedders can share it across threads,
51//! but the mutex still serialises actual work.
52
53mod agents;
54mod builtins;
55mod cypher;
56mod file_table;
57mod graph;
58mod imports;
59mod overlay;
60mod snapshot;
61mod state;
62mod symbol_graph;
63mod trigram;
64mod versions;
65mod walker;
66mod words;
67
68use std::path::Path;
69use std::sync::{Arc, Mutex};
70
71use harn_vm::VmValue;
72
73use crate::error::HostlibError;
74use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
75
76pub use agents::{AgentId, AgentInfo, AgentRegistry, AgentState, RegistryConfig};
77pub use builtins::SharedIndex;
78pub use cypher::{CypherError, CypherRow, CypherValue};
79pub use file_table::{FileId, IndexedFile, IndexedSymbol};
80pub use graph::DepGraph;
81pub use overlay::{BranchOverlay, OverlayState};
82pub use snapshot::{CodeIndexSnapshot, SnapshotMeta};
83pub use state::{BuildOutcome, IndexState};
84pub use symbol_graph::{Edge, EdgeKind, Node, NodeId, NodeKind, SymbolGraph};
85pub use trigram::TrigramIndex;
86pub use versions::{ChangeRecord, EditOp, VersionEntry, VersionLog, HISTORY_LIMIT};
87pub use words::{WordHit, WordIndex};
88
89/// Code-index capability handle.
90///
91/// Holds the [`SharedIndex`] cell behind an `Arc<Mutex<...>>`; cloning
92/// the capability shares state. The capability also threads a
93/// `current_agent_id` slot used by the `current_agent_id` host builtin —
94/// embedders update this slot from the request-handling layer so each
95/// host call surfaces the right agent identity to scripts.
96#[derive(Clone, Default)]
97pub struct CodeIndexCapability {
98    index: SharedIndex,
99    current_agent: Arc<Mutex<Option<AgentId>>>,
100}
101
102impl CodeIndexCapability {
103    /// Create a capability with an empty workspace slot. The first
104    /// `hostlib_code_index_rebuild` call populates it.
105    pub fn new() -> Self {
106        Self {
107            index: Arc::new(Mutex::new(None)),
108            current_agent: Arc::new(Mutex::new(None)),
109        }
110    }
111
112    /// Borrow the underlying shared cell. Useful for tests and embedders
113    /// that want to introspect index state without going through the
114    /// builtins.
115    pub fn shared(&self) -> SharedIndex {
116        self.index.clone()
117    }
118
119    /// Borrow the current-agent slot. Embedders bind this slot before
120    /// dispatching a host call so that `current_agent_id` returns the
121    /// right value to the script.
122    pub fn current_agent_slot(&self) -> Arc<Mutex<Option<AgentId>>> {
123        self.current_agent.clone()
124    }
125
126    /// Convenience: set the current agent id. Returns the previous value
127    /// (so callers can restore on completion if they bind per-call).
128    pub fn set_current_agent(&self, id: Option<AgentId>) -> Option<AgentId> {
129        let mut guard = self.current_agent.lock().expect("current_agent poisoned");
130        std::mem::replace(&mut *guard, id)
131    }
132
133    /// Restore from a previously saved snapshot at the path returned by
134    /// [`CodeIndexSnapshot::path_for`]. After restoring, runs
135    /// [`IndexState::reap_after_recovery`] so stale agent records and
136    /// locks are dropped before the daemon serves traffic.
137    ///
138    /// Returns `true` on a successful restore, `false` if no snapshot
139    /// existed (or the format was unrecognised). Errors propagate I/O
140    /// problems verbatim so callers can decide whether to fall back to
141    /// `rebuild`.
142    pub fn restore_from_disk(&self, workspace_root: &Path) -> std::io::Result<bool> {
143        match CodeIndexSnapshot::load(workspace_root)? {
144            Some(snap) => {
145                let mut state = IndexState::from_snapshot(snap);
146                state.reap_after_recovery(state::now_unix_ms());
147                let mut guard = self.index.lock().expect("code_index mutex poisoned");
148                *guard = Some(state);
149                Ok(true)
150            }
151            None => Ok(false),
152        }
153    }
154
155    /// Persist the current in-memory state to the path returned by
156    /// [`CodeIndexSnapshot::path_for`]. Returns `Ok(false)` when the
157    /// capability is empty (nothing to save).
158    pub fn persist_to_disk(&self) -> std::io::Result<bool> {
159        let snap = {
160            let guard = self.index.lock().expect("code_index mutex poisoned");
161            guard
162                .as_ref()
163                .map(|state| (state.snapshot(), state.root.clone()))
164        };
165        match snap {
166            Some((snap, root)) => {
167                snap.save(&root)?;
168                Ok(true)
169            }
170            None => Ok(false),
171        }
172    }
173}
174
175impl HostlibCapability for CodeIndexCapability {
176    fn module_name(&self) -> &'static str {
177        "code_index"
178    }
179
180    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
181        // Workspace queries (original 5).
182        register(
183            registry,
184            self.index.clone(),
185            builtins::BUILTIN_QUERY,
186            "query",
187            builtins::run_query,
188        );
189        register(
190            registry,
191            self.index.clone(),
192            builtins::BUILTIN_REBUILD,
193            "rebuild",
194            builtins::run_rebuild,
195        );
196        register(
197            registry,
198            self.index.clone(),
199            builtins::BUILTIN_STATS,
200            "stats",
201            builtins::run_stats,
202        );
203        register(
204            registry,
205            self.index.clone(),
206            builtins::BUILTIN_IMPORTS_FOR,
207            "imports_for",
208            builtins::run_imports_for,
209        );
210        register(
211            registry,
212            self.index.clone(),
213            builtins::BUILTIN_IMPORTERS_OF,
214            "importers_of",
215            builtins::run_importers_of,
216        );
217
218        // File table accessors.
219        register(
220            registry,
221            self.index.clone(),
222            builtins::BUILTIN_PATH_TO_ID,
223            "path_to_id",
224            builtins::run_path_to_id,
225        );
226        register(
227            registry,
228            self.index.clone(),
229            builtins::BUILTIN_ID_TO_PATH,
230            "id_to_path",
231            builtins::run_id_to_path,
232        );
233        register(
234            registry,
235            self.index.clone(),
236            builtins::BUILTIN_FILE_IDS,
237            "file_ids",
238            builtins::run_file_ids,
239        );
240        register(
241            registry,
242            self.index.clone(),
243            builtins::BUILTIN_FILE_META,
244            "file_meta",
245            builtins::run_file_meta,
246        );
247        register(
248            registry,
249            self.index.clone(),
250            builtins::BUILTIN_FILE_HASH,
251            "file_hash",
252            builtins::run_file_hash,
253        );
254
255        // Cached read paths.
256        register(
257            registry,
258            self.index.clone(),
259            builtins::BUILTIN_READ_RANGE,
260            "read_range",
261            builtins::run_read_range,
262        );
263        register(
264            registry,
265            self.index.clone(),
266            builtins::BUILTIN_REINDEX_FILE,
267            "reindex_file",
268            builtins::run_reindex_file,
269        );
270        register(
271            registry,
272            self.index.clone(),
273            builtins::BUILTIN_TRIGRAM_QUERY,
274            "trigram_query",
275            builtins::run_trigram_query,
276        );
277        register(
278            registry,
279            self.index.clone(),
280            builtins::BUILTIN_EXTRACT_TRIGRAMS,
281            "extract_trigrams",
282            builtins::run_extract_trigrams,
283        );
284        register(
285            registry,
286            self.index.clone(),
287            builtins::BUILTIN_WORD_GET,
288            "word_get",
289            builtins::run_word_get,
290        );
291        register(
292            registry,
293            self.index.clone(),
294            builtins::BUILTIN_DEPS_GET,
295            "deps_get",
296            builtins::run_deps_get,
297        );
298        register(
299            registry,
300            self.index.clone(),
301            builtins::BUILTIN_OUTLINE_GET,
302            "outline_get",
303            builtins::run_outline_get,
304        );
305
306        // Change log.
307        register(
308            registry,
309            self.index.clone(),
310            builtins::BUILTIN_CURRENT_SEQ,
311            "current_seq",
312            builtins::run_current_seq,
313        );
314        register(
315            registry,
316            self.index.clone(),
317            builtins::BUILTIN_CHANGES_SINCE,
318            "changes_since",
319            builtins::run_changes_since,
320        );
321        register(
322            registry,
323            self.index.clone(),
324            builtins::BUILTIN_VERSION_RECORD,
325            "version_record",
326            builtins::run_version_record,
327        );
328
329        // Agent registry + locks.
330        register(
331            registry,
332            self.index.clone(),
333            builtins::BUILTIN_AGENT_REGISTER,
334            "agent_register",
335            builtins::run_agent_register,
336        );
337        register(
338            registry,
339            self.index.clone(),
340            builtins::BUILTIN_AGENT_HEARTBEAT,
341            "agent_heartbeat",
342            builtins::run_agent_heartbeat,
343        );
344        register(
345            registry,
346            self.index.clone(),
347            builtins::BUILTIN_AGENT_UNREGISTER,
348            "agent_unregister",
349            builtins::run_agent_unregister,
350        );
351        register(
352            registry,
353            self.index.clone(),
354            builtins::BUILTIN_LOCK_TRY,
355            "lock_try",
356            builtins::run_lock_try,
357        );
358        register(
359            registry,
360            self.index.clone(),
361            builtins::BUILTIN_LOCK_RELEASE,
362            "lock_release",
363            builtins::run_lock_release,
364        );
365        register(
366            registry,
367            self.index.clone(),
368            builtins::BUILTIN_STATUS,
369            "status",
370            builtins::run_status,
371        );
372
373        // `current_agent_id` is the only handler that reads from the
374        // capability's per-call `current_agent` slot rather than the
375        // index state, so it gets its own closure.
376        let slot = self.current_agent.clone();
377        let handler: SyncHandler =
378            Arc::new(move |args| builtins::run_current_agent_id(&slot, args));
379        registry.register(RegisteredBuiltin {
380            name: builtins::BUILTIN_CURRENT_AGENT_ID,
381            module: "code_index",
382            method: "current_agent_id",
383            handler,
384        });
385
386        // Typed symbol graph builtins (issue #2434).
387        register(
388            registry,
389            self.index.clone(),
390            builtins::BUILTIN_CYPHER,
391            "cypher",
392            builtins::run_cypher,
393        );
394        register(
395            registry,
396            self.index.clone(),
397            builtins::BUILTIN_BRANCH_OVERLAY,
398            "branch_overlay",
399            builtins::run_branch_overlay,
400        );
401        register(
402            registry,
403            self.index.clone(),
404            builtins::BUILTIN_FRESHNESS,
405            "freshness",
406            builtins::run_freshness,
407        );
408    }
409}
410
411fn register(
412    registry: &mut BuiltinRegistry,
413    index: SharedIndex,
414    name: &'static str,
415    method: &'static str,
416    runner: fn(&SharedIndex, &[VmValue]) -> Result<VmValue, HostlibError>,
417) {
418    let captured = index;
419    let handler: SyncHandler = Arc::new(move |args| runner(&captured, args));
420    registry.register(RegisteredBuiltin {
421        name,
422        module: "code_index",
423        method,
424        handler,
425    });
426}