Skip to main content

harn_hostlib/code_index/
mod.rs

1//! Code index host capability.
2//!
3//! Deterministic trigram/word index plus live workspace state (agent
4//! registry, advisory locks, append-only version log, file id assignment,
5//! cached reads). The capability owns one [`SharedIndex`] cell per
6//! instance; cloning the capability shares state with every Harn VM that
7//! has been wired against it.
8//!
9//! Surface — every builtin is locked by `schemas/code_index/<method>.json`:
10//!
11//! ### Workspace queries (the original 5)
12//!
13//! | Builtin                          | What it does                                           |
14//! |----------------------------------|--------------------------------------------------------|
15//! | `hostlib_code_index_query`       | Trigram-accelerated literal substring search.          |
16//! | `hostlib_code_index_rebuild`     | Walk a workspace and (re)build the in-memory index.    |
17//! | `hostlib_code_index_stats`       | Count files/trigrams/words + last rebuild timestamp.   |
18//! | `hostlib_code_index_imports_for` | Imports declared by a single file (with resolutions).  |
19//! | `hostlib_code_index_importers_of`| Reverse lookup: who imports the given module/path?     |
20//!
21//! ### Live workspace state (added in #776)
22//!
23//! - **Agents**: `agent_register`, `agent_heartbeat`, `agent_unregister`,
24//!   `current_agent_id`, `status`.
25//! - **Locks**: `lock_try`, `lock_release`.
26//! - **Change log**: `current_seq`, `changes_since`, `version_record`.
27//! - **File table**: `path_to_id`, `id_to_path`, `file_ids`, `file_meta`,
28//!   `file_hash`.
29//! - **Cached reads**: `read_range`, `reindex_file`, `trigram_query`,
30//!   `extract_trigrams`, `word_get`, `deps_get`, `outline_get`.
31//!
32//! ## Concurrency model
33//!
34//! All ops serialise through a single `Arc<Mutex<Option<IndexState>>>` so
35//! the IDE editor, eval, and live agent all see one consistent view. The
36//! capability is `Send + Sync` so embedders can share it across threads,
37//! but the mutex still serialises actual work.
38
39mod agents;
40mod builtins;
41mod file_table;
42mod graph;
43mod imports;
44mod snapshot;
45mod state;
46mod trigram;
47mod versions;
48mod walker;
49mod words;
50
51use std::path::Path;
52use std::sync::{Arc, Mutex};
53
54use harn_vm::VmValue;
55
56use crate::error::HostlibError;
57use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
58
59pub use agents::{AgentId, AgentInfo, AgentRegistry, AgentState, RegistryConfig};
60pub use builtins::SharedIndex;
61pub use file_table::{FileId, IndexedFile, IndexedSymbol};
62pub use graph::DepGraph;
63pub use snapshot::{CodeIndexSnapshot, SnapshotMeta};
64pub use state::{BuildOutcome, IndexState};
65pub use trigram::TrigramIndex;
66pub use versions::{ChangeRecord, EditOp, VersionEntry, VersionLog, HISTORY_LIMIT};
67pub use words::{WordHit, WordIndex};
68
69/// Code-index capability handle.
70///
71/// Holds the [`SharedIndex`] cell behind an `Arc<Mutex<...>>`; cloning
72/// the capability shares state. The capability also threads a
73/// `current_agent_id` slot used by the `current_agent_id` host builtin —
74/// embedders update this slot from the request-handling layer so each
75/// host call surfaces the right agent identity to scripts.
76#[derive(Clone, Default)]
77pub struct CodeIndexCapability {
78    index: SharedIndex,
79    current_agent: Arc<Mutex<Option<AgentId>>>,
80}
81
82impl CodeIndexCapability {
83    /// Create a capability with an empty workspace slot. The first
84    /// `hostlib_code_index_rebuild` call populates it.
85    pub fn new() -> Self {
86        Self {
87            index: Arc::new(Mutex::new(None)),
88            current_agent: Arc::new(Mutex::new(None)),
89        }
90    }
91
92    /// Borrow the underlying shared cell. Useful for tests and embedders
93    /// that want to introspect index state without going through the
94    /// builtins.
95    pub fn shared(&self) -> SharedIndex {
96        self.index.clone()
97    }
98
99    /// Borrow the current-agent slot. Embedders bind this slot before
100    /// dispatching a host call so that `current_agent_id` returns the
101    /// right value to the script.
102    pub fn current_agent_slot(&self) -> Arc<Mutex<Option<AgentId>>> {
103        self.current_agent.clone()
104    }
105
106    /// Convenience: set the current agent id. Returns the previous value
107    /// (so callers can restore on completion if they bind per-call).
108    pub fn set_current_agent(&self, id: Option<AgentId>) -> Option<AgentId> {
109        let mut guard = self.current_agent.lock().expect("current_agent poisoned");
110        std::mem::replace(&mut *guard, id)
111    }
112
113    /// Restore from a previously saved snapshot at the path returned by
114    /// [`CodeIndexSnapshot::path_for`]. After restoring, runs
115    /// [`IndexState::reap_after_recovery`] so stale agent records and
116    /// locks are dropped before the daemon serves traffic.
117    ///
118    /// Returns `true` on a successful restore, `false` if no snapshot
119    /// existed (or the format was unrecognised). Errors propagate I/O
120    /// problems verbatim so callers can decide whether to fall back to
121    /// `rebuild`.
122    pub fn restore_from_disk(&self, workspace_root: &Path) -> std::io::Result<bool> {
123        match CodeIndexSnapshot::load(workspace_root)? {
124            Some(snap) => {
125                let mut state = IndexState::from_snapshot(snap);
126                state.reap_after_recovery(state::now_unix_ms());
127                let mut guard = self.index.lock().expect("code_index mutex poisoned");
128                *guard = Some(state);
129                Ok(true)
130            }
131            None => Ok(false),
132        }
133    }
134
135    /// Persist the current in-memory state to the path returned by
136    /// [`CodeIndexSnapshot::path_for`]. Returns `Ok(false)` when the
137    /// capability is empty (nothing to save).
138    pub fn persist_to_disk(&self) -> std::io::Result<bool> {
139        let snap = {
140            let guard = self.index.lock().expect("code_index mutex poisoned");
141            guard
142                .as_ref()
143                .map(|state| (state.snapshot(), state.root.clone()))
144        };
145        match snap {
146            Some((snap, root)) => {
147                snap.save(&root)?;
148                Ok(true)
149            }
150            None => Ok(false),
151        }
152    }
153}
154
155impl HostlibCapability for CodeIndexCapability {
156    fn module_name(&self) -> &'static str {
157        "code_index"
158    }
159
160    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
161        // Workspace queries (original 5).
162        register(
163            registry,
164            self.index.clone(),
165            builtins::BUILTIN_QUERY,
166            "query",
167            builtins::run_query,
168        );
169        register(
170            registry,
171            self.index.clone(),
172            builtins::BUILTIN_REBUILD,
173            "rebuild",
174            builtins::run_rebuild,
175        );
176        register(
177            registry,
178            self.index.clone(),
179            builtins::BUILTIN_STATS,
180            "stats",
181            builtins::run_stats,
182        );
183        register(
184            registry,
185            self.index.clone(),
186            builtins::BUILTIN_IMPORTS_FOR,
187            "imports_for",
188            builtins::run_imports_for,
189        );
190        register(
191            registry,
192            self.index.clone(),
193            builtins::BUILTIN_IMPORTERS_OF,
194            "importers_of",
195            builtins::run_importers_of,
196        );
197
198        // File table accessors.
199        register(
200            registry,
201            self.index.clone(),
202            builtins::BUILTIN_PATH_TO_ID,
203            "path_to_id",
204            builtins::run_path_to_id,
205        );
206        register(
207            registry,
208            self.index.clone(),
209            builtins::BUILTIN_ID_TO_PATH,
210            "id_to_path",
211            builtins::run_id_to_path,
212        );
213        register(
214            registry,
215            self.index.clone(),
216            builtins::BUILTIN_FILE_IDS,
217            "file_ids",
218            builtins::run_file_ids,
219        );
220        register(
221            registry,
222            self.index.clone(),
223            builtins::BUILTIN_FILE_META,
224            "file_meta",
225            builtins::run_file_meta,
226        );
227        register(
228            registry,
229            self.index.clone(),
230            builtins::BUILTIN_FILE_HASH,
231            "file_hash",
232            builtins::run_file_hash,
233        );
234
235        // Cached read paths.
236        register(
237            registry,
238            self.index.clone(),
239            builtins::BUILTIN_READ_RANGE,
240            "read_range",
241            builtins::run_read_range,
242        );
243        register(
244            registry,
245            self.index.clone(),
246            builtins::BUILTIN_REINDEX_FILE,
247            "reindex_file",
248            builtins::run_reindex_file,
249        );
250        register(
251            registry,
252            self.index.clone(),
253            builtins::BUILTIN_TRIGRAM_QUERY,
254            "trigram_query",
255            builtins::run_trigram_query,
256        );
257        register(
258            registry,
259            self.index.clone(),
260            builtins::BUILTIN_EXTRACT_TRIGRAMS,
261            "extract_trigrams",
262            builtins::run_extract_trigrams,
263        );
264        register(
265            registry,
266            self.index.clone(),
267            builtins::BUILTIN_WORD_GET,
268            "word_get",
269            builtins::run_word_get,
270        );
271        register(
272            registry,
273            self.index.clone(),
274            builtins::BUILTIN_DEPS_GET,
275            "deps_get",
276            builtins::run_deps_get,
277        );
278        register(
279            registry,
280            self.index.clone(),
281            builtins::BUILTIN_OUTLINE_GET,
282            "outline_get",
283            builtins::run_outline_get,
284        );
285
286        // Change log.
287        register(
288            registry,
289            self.index.clone(),
290            builtins::BUILTIN_CURRENT_SEQ,
291            "current_seq",
292            builtins::run_current_seq,
293        );
294        register(
295            registry,
296            self.index.clone(),
297            builtins::BUILTIN_CHANGES_SINCE,
298            "changes_since",
299            builtins::run_changes_since,
300        );
301        register(
302            registry,
303            self.index.clone(),
304            builtins::BUILTIN_VERSION_RECORD,
305            "version_record",
306            builtins::run_version_record,
307        );
308
309        // Agent registry + locks.
310        register(
311            registry,
312            self.index.clone(),
313            builtins::BUILTIN_AGENT_REGISTER,
314            "agent_register",
315            builtins::run_agent_register,
316        );
317        register(
318            registry,
319            self.index.clone(),
320            builtins::BUILTIN_AGENT_HEARTBEAT,
321            "agent_heartbeat",
322            builtins::run_agent_heartbeat,
323        );
324        register(
325            registry,
326            self.index.clone(),
327            builtins::BUILTIN_AGENT_UNREGISTER,
328            "agent_unregister",
329            builtins::run_agent_unregister,
330        );
331        register(
332            registry,
333            self.index.clone(),
334            builtins::BUILTIN_LOCK_TRY,
335            "lock_try",
336            builtins::run_lock_try,
337        );
338        register(
339            registry,
340            self.index.clone(),
341            builtins::BUILTIN_LOCK_RELEASE,
342            "lock_release",
343            builtins::run_lock_release,
344        );
345        register(
346            registry,
347            self.index.clone(),
348            builtins::BUILTIN_STATUS,
349            "status",
350            builtins::run_status,
351        );
352
353        // `current_agent_id` is the only handler that reads from the
354        // capability's per-call `current_agent` slot rather than the
355        // index state, so it gets its own closure.
356        let slot = self.current_agent.clone();
357        let handler: SyncHandler =
358            Arc::new(move |args| builtins::run_current_agent_id(&slot, args));
359        registry.register(RegisteredBuiltin {
360            name: builtins::BUILTIN_CURRENT_AGENT_ID,
361            module: "code_index",
362            method: "current_agent_id",
363            handler,
364        });
365    }
366}
367
368fn register(
369    registry: &mut BuiltinRegistry,
370    index: SharedIndex,
371    name: &'static str,
372    method: &'static str,
373    runner: fn(&SharedIndex, &[VmValue]) -> Result<VmValue, HostlibError>,
374) {
375    let captured = index;
376    let handler: SyncHandler = Arc::new(move |args| runner(&captured, args));
377    registry.register(RegisteredBuiltin {
378        name,
379        module: "code_index",
380        method,
381        handler,
382    });
383}