Skip to main content

harn_hostlib/code_index/
mod.rs

1//! Code index host capability.
2//!
3//! Ports the deterministic trigram/word index plus the live workspace
4//! state (agent registry, advisory locks, append-only version log, file
5//! id assignment, cached reads) that previously lived in
6//! `Sources/BurinCodeIndex/` on the Swift side. The capability owns one
7//! [`SharedIndex`] cell per instance; cloning the capability shares
8//! state with every Harn VM that has been wired against it.
9//!
10//! Surface — every builtin is locked by `schemas/code_index/<method>.json`:
11//!
12//! ### Workspace queries (the original 5)
13//!
14//! | Builtin                          | What it does                                           |
15//! |----------------------------------|--------------------------------------------------------|
16//! | `hostlib_code_index_query`       | Trigram-accelerated literal substring search.          |
17//! | `hostlib_code_index_rebuild`     | Walk a workspace and (re)build the in-memory index.    |
18//! | `hostlib_code_index_stats`       | Count files/trigrams/words + last rebuild timestamp.   |
19//! | `hostlib_code_index_imports_for` | Imports declared by a single file (with resolutions).  |
20//! | `hostlib_code_index_importers_of`| Reverse lookup: who imports the given module/path?     |
21//!
22//! ### Live workspace state (added in #776)
23//!
24//! - **Agents**: `agent_register`, `agent_heartbeat`, `agent_unregister`,
25//!   `current_agent_id`, `status`.
26//! - **Locks**: `lock_try`, `lock_release`.
27//! - **Change log**: `current_seq`, `changes_since`, `version_record`.
28//! - **File table**: `path_to_id`, `id_to_path`, `file_ids`, `file_meta`,
29//!   `file_hash`.
30//! - **Cached reads**: `read_range`, `reindex_file`, `trigram_query`,
31//!   `extract_trigrams`, `word_get`, `deps_get`, `outline_get`.
32//!
33//! ## Concurrency model
34//!
35//! All ops serialise through a single `Arc<Mutex<Option<IndexState>>>`.
36//! That matches the Swift actor: the IDE editor, eval, and live agent all
37//! see one consistent view. The capability is `Send + Sync` so embedders
38//! can share it across threads, but the mutex still serialises actual
39//! work.
40
41mod agents;
42mod builtins;
43mod file_table;
44mod graph;
45mod imports;
46mod snapshot;
47mod state;
48mod trigram;
49mod versions;
50mod walker;
51mod words;
52
53use std::path::Path;
54use std::sync::{Arc, Mutex};
55
56use harn_vm::VmValue;
57
58use crate::error::HostlibError;
59use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
60
61pub use agents::{AgentId, AgentInfo, AgentRegistry, AgentState, RegistryConfig};
62pub use builtins::SharedIndex;
63pub use file_table::{FileId, IndexedFile, IndexedSymbol};
64pub use graph::DepGraph;
65pub use snapshot::{CodeIndexSnapshot, SnapshotMeta};
66pub use state::{BuildOutcome, IndexState};
67pub use trigram::TrigramIndex;
68pub use versions::{ChangeRecord, EditOp, VersionEntry, VersionLog, HISTORY_LIMIT};
69pub use words::{WordHit, WordIndex};
70
71/// Code-index capability handle.
72///
73/// Holds the [`SharedIndex`] cell behind an `Arc<Mutex<...>>`; cloning
74/// the capability shares state. The capability also threads a
75/// `current_agent_id` slot used by the `current_agent_id` host builtin —
76/// embedders update this slot from the request-handling layer so each
77/// host call surfaces the right agent identity to scripts.
78#[derive(Clone, Default)]
79pub struct CodeIndexCapability {
80    index: SharedIndex,
81    current_agent: Arc<Mutex<Option<AgentId>>>,
82}
83
84impl CodeIndexCapability {
85    /// Create a capability with an empty workspace slot. The first
86    /// `hostlib_code_index_rebuild` call populates it.
87    pub fn new() -> Self {
88        Self {
89            index: Arc::new(Mutex::new(None)),
90            current_agent: Arc::new(Mutex::new(None)),
91        }
92    }
93
94    /// Borrow the underlying shared cell. Useful for tests and embedders
95    /// that want to introspect index state without going through the
96    /// builtins.
97    pub fn shared(&self) -> SharedIndex {
98        self.index.clone()
99    }
100
101    /// Borrow the current-agent slot. Embedders bind this slot before
102    /// dispatching a host call so that `current_agent_id` returns the
103    /// right value to the script.
104    pub fn current_agent_slot(&self) -> Arc<Mutex<Option<AgentId>>> {
105        self.current_agent.clone()
106    }
107
108    /// Convenience: set the current agent id. Returns the previous value
109    /// (so callers can restore on completion if they bind per-call).
110    pub fn set_current_agent(&self, id: Option<AgentId>) -> Option<AgentId> {
111        let mut guard = self.current_agent.lock().expect("current_agent poisoned");
112        std::mem::replace(&mut *guard, id)
113    }
114
115    /// Restore from a previously saved snapshot at
116    /// `<root>/.burin/index/snapshot.json`. After restoring, runs
117    /// [`IndexState::reap_after_recovery`] so stale agent records and
118    /// locks are dropped before the daemon serves traffic.
119    ///
120    /// Returns `true` on a successful restore, `false` if no snapshot
121    /// existed (or the format was unrecognised). Errors propagate I/O
122    /// problems verbatim so callers can decide whether to fall back to
123    /// `rebuild`.
124    pub fn restore_from_disk(&self, workspace_root: &Path) -> std::io::Result<bool> {
125        match CodeIndexSnapshot::load(workspace_root)? {
126            Some(snap) => {
127                let mut state = IndexState::from_snapshot(snap);
128                state.reap_after_recovery(state::now_unix_ms());
129                let mut guard = self.index.lock().expect("code_index mutex poisoned");
130                *guard = Some(state);
131                Ok(true)
132            }
133            None => Ok(false),
134        }
135    }
136
137    /// Persist the current in-memory state to
138    /// `<root>/.burin/index/snapshot.json`. Returns `Ok(false)` when the
139    /// capability is empty (nothing to save).
140    pub fn persist_to_disk(&self) -> std::io::Result<bool> {
141        let snap = {
142            let guard = self.index.lock().expect("code_index mutex poisoned");
143            guard
144                .as_ref()
145                .map(|state| (state.snapshot(), state.root.clone()))
146        };
147        match snap {
148            Some((snap, root)) => {
149                snap.save(&root)?;
150                Ok(true)
151            }
152            None => Ok(false),
153        }
154    }
155}
156
157impl HostlibCapability for CodeIndexCapability {
158    fn module_name(&self) -> &'static str {
159        "code_index"
160    }
161
162    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
163        // Workspace queries (original 5).
164        register(
165            registry,
166            self.index.clone(),
167            builtins::BUILTIN_QUERY,
168            "query",
169            builtins::run_query,
170        );
171        register(
172            registry,
173            self.index.clone(),
174            builtins::BUILTIN_REBUILD,
175            "rebuild",
176            builtins::run_rebuild,
177        );
178        register(
179            registry,
180            self.index.clone(),
181            builtins::BUILTIN_STATS,
182            "stats",
183            builtins::run_stats,
184        );
185        register(
186            registry,
187            self.index.clone(),
188            builtins::BUILTIN_IMPORTS_FOR,
189            "imports_for",
190            builtins::run_imports_for,
191        );
192        register(
193            registry,
194            self.index.clone(),
195            builtins::BUILTIN_IMPORTERS_OF,
196            "importers_of",
197            builtins::run_importers_of,
198        );
199
200        // File table accessors.
201        register(
202            registry,
203            self.index.clone(),
204            builtins::BUILTIN_PATH_TO_ID,
205            "path_to_id",
206            builtins::run_path_to_id,
207        );
208        register(
209            registry,
210            self.index.clone(),
211            builtins::BUILTIN_ID_TO_PATH,
212            "id_to_path",
213            builtins::run_id_to_path,
214        );
215        register(
216            registry,
217            self.index.clone(),
218            builtins::BUILTIN_FILE_IDS,
219            "file_ids",
220            builtins::run_file_ids,
221        );
222        register(
223            registry,
224            self.index.clone(),
225            builtins::BUILTIN_FILE_META,
226            "file_meta",
227            builtins::run_file_meta,
228        );
229        register(
230            registry,
231            self.index.clone(),
232            builtins::BUILTIN_FILE_HASH,
233            "file_hash",
234            builtins::run_file_hash,
235        );
236
237        // Cached read paths.
238        register(
239            registry,
240            self.index.clone(),
241            builtins::BUILTIN_READ_RANGE,
242            "read_range",
243            builtins::run_read_range,
244        );
245        register(
246            registry,
247            self.index.clone(),
248            builtins::BUILTIN_REINDEX_FILE,
249            "reindex_file",
250            builtins::run_reindex_file,
251        );
252        register(
253            registry,
254            self.index.clone(),
255            builtins::BUILTIN_TRIGRAM_QUERY,
256            "trigram_query",
257            builtins::run_trigram_query,
258        );
259        register(
260            registry,
261            self.index.clone(),
262            builtins::BUILTIN_EXTRACT_TRIGRAMS,
263            "extract_trigrams",
264            builtins::run_extract_trigrams,
265        );
266        register(
267            registry,
268            self.index.clone(),
269            builtins::BUILTIN_WORD_GET,
270            "word_get",
271            builtins::run_word_get,
272        );
273        register(
274            registry,
275            self.index.clone(),
276            builtins::BUILTIN_DEPS_GET,
277            "deps_get",
278            builtins::run_deps_get,
279        );
280        register(
281            registry,
282            self.index.clone(),
283            builtins::BUILTIN_OUTLINE_GET,
284            "outline_get",
285            builtins::run_outline_get,
286        );
287
288        // Change log.
289        register(
290            registry,
291            self.index.clone(),
292            builtins::BUILTIN_CURRENT_SEQ,
293            "current_seq",
294            builtins::run_current_seq,
295        );
296        register(
297            registry,
298            self.index.clone(),
299            builtins::BUILTIN_CHANGES_SINCE,
300            "changes_since",
301            builtins::run_changes_since,
302        );
303        register(
304            registry,
305            self.index.clone(),
306            builtins::BUILTIN_VERSION_RECORD,
307            "version_record",
308            builtins::run_version_record,
309        );
310
311        // Agent registry + locks.
312        register(
313            registry,
314            self.index.clone(),
315            builtins::BUILTIN_AGENT_REGISTER,
316            "agent_register",
317            builtins::run_agent_register,
318        );
319        register(
320            registry,
321            self.index.clone(),
322            builtins::BUILTIN_AGENT_HEARTBEAT,
323            "agent_heartbeat",
324            builtins::run_agent_heartbeat,
325        );
326        register(
327            registry,
328            self.index.clone(),
329            builtins::BUILTIN_AGENT_UNREGISTER,
330            "agent_unregister",
331            builtins::run_agent_unregister,
332        );
333        register(
334            registry,
335            self.index.clone(),
336            builtins::BUILTIN_LOCK_TRY,
337            "lock_try",
338            builtins::run_lock_try,
339        );
340        register(
341            registry,
342            self.index.clone(),
343            builtins::BUILTIN_LOCK_RELEASE,
344            "lock_release",
345            builtins::run_lock_release,
346        );
347        register(
348            registry,
349            self.index.clone(),
350            builtins::BUILTIN_STATUS,
351            "status",
352            builtins::run_status,
353        );
354
355        // `current_agent_id` is the only handler that reads from the
356        // capability's per-call `current_agent` slot rather than the
357        // index state, so it gets its own closure.
358        let slot = self.current_agent.clone();
359        let handler: SyncHandler =
360            Arc::new(move |args| builtins::run_current_agent_id(&slot, args));
361        registry.register(RegisteredBuiltin {
362            name: builtins::BUILTIN_CURRENT_AGENT_ID,
363            module: "code_index",
364            method: "current_agent_id",
365            handler,
366        });
367    }
368}
369
370fn register(
371    registry: &mut BuiltinRegistry,
372    index: SharedIndex,
373    name: &'static str,
374    method: &'static str,
375    runner: fn(&SharedIndex, &[VmValue]) -> Result<VmValue, HostlibError>,
376) {
377    let captured = index;
378    let handler: SyncHandler = Arc::new(move |args| runner(&captured, args));
379    registry.register(RegisteredBuiltin {
380        name,
381        module: "code_index",
382        method,
383        handler,
384    });
385}