harn_hostlib/code_index/readonly.rs
1//! Additive, read-only secondary roots for the code index (issue #2403
2//! follow-up).
3//!
4//! The primary [`IndexState`] (built by `hostlib_code_index_rebuild`) owns
5//! exactly one writable workspace root and flips its slot wholesale on
6//! every rebuild. That is correct for the project under edit, but it means
7//! a caller cannot also index *dependency* / SDK roots (e.g. the macOS
8//! IOKit headers that declare `kIOPSTimeToFullChargeKey`) without
9//! clobbering the project index.
10//!
11//! This module adds a parallel, **read-only** set of secondary
12//! [`IndexState`]s that live beside the primary in the capability. They are
13//! merged into path-based query results so library API symbols become
14//! discoverable, while staying entirely out of the project's writable
15//! scope:
16//!
17//! - They are never mutated by `version_record`, `reindex_file`,
18//! `agent_*`, `lock_*`, or `rename_symbol` — those builtins only ever
19//! touch the primary slot.
20//! - `read_range` will resolve a path inside a read-only root (so a symbol
21//! discovered there can be read), but every *write* path (rename,
22//! reindex, version log, locks) still rejects out-of-project paths
23//! exactly as before, because those paths are not in the primary index.
24//!
25//! Concurrency mirrors the primary: a single `Arc<Mutex<Vec<IndexState>>>`
26//! cell, so the same serialised view is shared by every Harn VM wired
27//! against the capability.
28
29use std::path::PathBuf;
30use std::sync::{Arc, Mutex};
31
32use harn_vm::VmValue;
33
34use super::builtins::SharedIndex;
35use super::state::IndexState;
36use crate::error::HostlibError;
37use crate::tools::args::{build_dict, dict_arg, optional_bool, optional_string_list, str_value};
38
39/// Shared cell holding the read-only secondary indexes. Each entry is a
40/// fully-built [`IndexState`] anchored at one dependency root. Empty until
41/// `hostlib_code_index_add_readonly_roots` is called.
42pub type ReadonlyRoots = Arc<Mutex<Vec<IndexState>>>;
43
44pub(super) const BUILTIN_ADD_READONLY_ROOTS: &str = "hostlib_code_index_add_readonly_roots";
45
46/// Build and merge one or more dependency roots into the read-only set.
47///
48/// Idempotent per root: re-adding a root that is already present rebuilds
49/// that entry in place rather than appending a duplicate. `replace: true`
50/// clears the existing set first (so a caller can swap the whole dependency
51/// fan-out without accumulating stale roots).
52pub(super) fn run_add_readonly_roots(
53 readonly: &ReadonlyRoots,
54 args: &[VmValue],
55) -> Result<VmValue, HostlibError> {
56 let raw = dict_arg(BUILTIN_ADD_READONLY_ROOTS, args)?;
57 let dict = raw.as_ref();
58 let roots = optional_string_list(BUILTIN_ADD_READONLY_ROOTS, dict, "roots")?;
59 let replace = optional_bool(BUILTIN_ADD_READONLY_ROOTS, dict, "replace", false)?;
60
61 let mut guard = readonly.lock().expect("readonly roots mutex poisoned");
62 if replace {
63 guard.clear();
64 }
65
66 let mut added: Vec<VmValue> = Vec::with_capacity(roots.len());
67 let mut total_files: usize = 0;
68 for raw_root in roots {
69 let root = PathBuf::from(&raw_root);
70 if !root.exists() {
71 return Err(HostlibError::InvalidParameter {
72 builtin: BUILTIN_ADD_READONLY_ROOTS,
73 param: "roots",
74 message: format!("path `{}` does not exist", root.display()),
75 });
76 }
77 if !root.is_dir() {
78 return Err(HostlibError::InvalidParameter {
79 builtin: BUILTIN_ADD_READONLY_ROOTS,
80 param: "roots",
81 message: format!("path `{}` is not a directory", root.display()),
82 });
83 }
84 let (state, outcome) = IndexState::build_from_root(&root);
85 let files_indexed = outcome.files_indexed as i64;
86 total_files += state.files.len();
87 // Idempotent: replace any existing entry for the same canonical
88 // root rather than appending a duplicate.
89 let canonical = state.root.clone();
90 if let Some(slot) = guard.iter_mut().find(|s| s.root == canonical) {
91 *slot = state;
92 } else {
93 guard.push(state);
94 }
95 added.push(build_dict([
96 ("root", str_value(canonical.to_string_lossy().as_ref())),
97 ("files_indexed", VmValue::Int(files_indexed)),
98 ]));
99 }
100
101 Ok(build_dict([
102 ("roots", VmValue::List(Arc::new(added))),
103 ("readonly_root_count", VmValue::Int(guard.len() as i64)),
104 ("readonly_files_indexed", VmValue::Int(total_files as i64)),
105 ]))
106}
107
108/// Resolve `rel_or_abs` against the primary index first, then fall back to
109/// every read-only secondary root. Returns the canonical absolute path of
110/// the first index that contains the file. Used by `read_range` so a symbol
111/// discovered in a dependency root can be read back.
112pub(super) fn resolve_read_path(
113 primary: &SharedIndex,
114 readonly: &ReadonlyRoots,
115 path: &str,
116) -> Option<PathBuf> {
117 // Resolution order, preserving the pre-#2403 contract:
118 // 1. An *existing* file inside the primary workspace root.
119 // 2. An *existing* file inside any read-only dependency root.
120 // 3. As a fallback, the primary root's resolution even if the file
121 // does not exist — so a missing in-workspace path still fails at
122 // the read step with "file not found" exactly as before, rather
123 // than being rejected as out-of-scope.
124 // `absolute_path` confines every candidate to a known root and accepts
125 // not-yet-existing paths (it is shared with write paths), so the
126 // existence filter is what lets a dependency-only path win over the
127 // phantom project path it would otherwise resolve to.
128 let (primary_resolved, primary_built) = {
129 let guard = primary.lock().expect("code_index mutex poisoned");
130 (
131 guard.as_ref().and_then(|state| state.absolute_path(path)),
132 guard.is_some(),
133 )
134 };
135 if let Some(abs) = primary_resolved.as_ref().filter(|p| p.exists()) {
136 return Some(abs.clone());
137 }
138 {
139 let guard = readonly.lock().expect("readonly roots mutex poisoned");
140 if let Some(abs) = guard
141 .iter()
142 .find_map(|state| state.absolute_path(path).filter(|p| p.exists()))
143 {
144 return Some(abs);
145 }
146 }
147 // When the primary index has never been built, there is no workspace
148 // root to confine `path` against, so `absolute_path` yields `None` and
149 // both lookups above miss. Pre-#3352 `read_range` handled this by
150 // reading the raw path straight off the filesystem (the old
151 // `None => PathBuf::from(&path)` arm). Restore that fallback so callers
152 // that read a path before any rebuild — e.g. `agent_run` scanning a
153 // process-output temp file to surface buried test-failure lines, and
154 // eval/verify reads over arbitrary shell output — still resolve instead
155 // of erroring "path must stay within the indexed workspace root". The
156 // read step still returns a genuine "file not found" for a missing
157 // path, so the contract is unchanged.
158 if !primary_built {
159 return Some(PathBuf::from(path));
160 }
161 primary_resolved
162}
163
164/// Run `query`-style scoring over every read-only secondary root, tagging
165/// each hit with its `root` so callers can disambiguate a dependency hit
166/// from a project hit. Returns hits already merged with whatever the
167/// primary index produced. Hits keep the same `path`/`score`/`match_count`
168/// shape as the primary query plus a `root` field; primary hits carry
169/// `root: nil`.
170pub(super) fn query_readonly_hits(
171 readonly: &ReadonlyRoots,
172 needle: &str,
173 case_sensitive: bool,
174) -> Vec<super::builtins::Hit> {
175 let guard = readonly.lock().expect("readonly roots mutex poisoned");
176 let mut hits: Vec<super::builtins::Hit> = Vec::new();
177 for state in guard.iter() {
178 super::builtins::collect_hits_into(state, needle, case_sensitive, &mut hits);
179 }
180 hits
181}