harn_hostlib/code_index/readonly.rs
1//! Additive, read-only secondary roots for the code index (issue #2403
2//! follow-up).
3//!
4//! The primary [`IndexState`] (built by `hostlib_code_index_rebuild`) owns
5//! exactly one writable workspace root and flips its slot wholesale on
6//! every rebuild. That is correct for the project under edit, but it means
7//! a caller cannot also index *dependency* / SDK roots (e.g. the macOS
8//! IOKit headers that declare `kIOPSTimeToFullChargeKey`) without
9//! clobbering the project index.
10//!
11//! This module adds a parallel, **read-only** set of secondary
12//! [`IndexState`]s that live beside the primary in the capability. They are
13//! merged into path-based query results so library API symbols become
14//! discoverable, while staying entirely out of the project's writable
15//! scope:
16//!
17//! - They are never mutated by `version_record`, `reindex_file`,
18//! `agent_*`, `lock_*`, or `rename_symbol` — those builtins only ever
19//! touch the primary slot.
20//! - `read_range` will resolve a path inside a read-only root (so a symbol
21//! discovered there can be read), but every *write* path (rename,
22//! reindex, version log, locks) still rejects out-of-project paths
23//! exactly as before, because those paths are not in the primary index.
24//!
25//! Concurrency mirrors the primary: a single `Arc<Mutex<Vec<IndexState>>>`
26//! cell, so the same serialised view is shared by every Harn VM wired
27//! against the capability.
28
29use std::path::PathBuf;
30use std::sync::{Arc, Mutex};
31
32use harn_vm::VmValue;
33
34use super::builtins::SharedIndex;
35use super::state::IndexState;
36use crate::error::HostlibError;
37use crate::tools::args::{
38 build_dict, dict_arg, optional_bool, optional_string_list, str_value, to_agent_path,
39};
40
41/// Shared cell holding the read-only secondary indexes. Each entry is a
42/// fully-built [`IndexState`] anchored at one dependency root. Empty until
43/// `hostlib_code_index_add_readonly_roots` is called.
44pub type ReadonlyRoots = Arc<Mutex<Vec<IndexState>>>;
45
46pub(super) const BUILTIN_ADD_READONLY_ROOTS: &str = "hostlib_code_index_add_readonly_roots";
47
48/// Build and merge one or more dependency roots into the read-only set.
49///
50/// Idempotent per root: re-adding a root that is already present rebuilds
51/// that entry in place rather than appending a duplicate. `replace: true`
52/// clears the existing set first (so a caller can swap the whole dependency
53/// fan-out without accumulating stale roots).
54pub(super) fn run_add_readonly_roots(
55 readonly: &ReadonlyRoots,
56 args: &[VmValue],
57) -> Result<VmValue, HostlibError> {
58 let raw = dict_arg(BUILTIN_ADD_READONLY_ROOTS, args)?;
59 let dict = raw.as_ref();
60 let roots = optional_string_list(BUILTIN_ADD_READONLY_ROOTS, dict, "roots")?;
61 let replace = optional_bool(BUILTIN_ADD_READONLY_ROOTS, dict, "replace", false)?;
62
63 let mut guard = readonly.lock().expect("readonly roots mutex poisoned");
64 if replace {
65 guard.clear();
66 }
67
68 let mut added: Vec<VmValue> = Vec::with_capacity(roots.len());
69 let mut total_files: usize = 0;
70 for raw_root in roots {
71 let root = PathBuf::from(&raw_root);
72 if !root.exists() {
73 return Err(HostlibError::InvalidParameter {
74 builtin: BUILTIN_ADD_READONLY_ROOTS,
75 param: "roots",
76 message: format!("path `{}` does not exist", root.display()),
77 });
78 }
79 if !root.is_dir() {
80 return Err(HostlibError::InvalidParameter {
81 builtin: BUILTIN_ADD_READONLY_ROOTS,
82 param: "roots",
83 message: format!("path `{}` is not a directory", root.display()),
84 });
85 }
86 let (state, outcome) = IndexState::build_from_root(&root);
87 let files_indexed = outcome.files_indexed as i64;
88 total_files += state.files.len();
89 // Idempotent: replace any existing entry for the same canonical
90 // root rather than appending a duplicate.
91 let canonical = state.root.clone();
92 if let Some(slot) = guard.iter_mut().find(|s| s.root == canonical) {
93 *slot = state;
94 } else {
95 guard.push(state);
96 }
97 added.push(build_dict([
98 ("root", str_value(to_agent_path(&canonical))),
99 ("files_indexed", VmValue::Int(files_indexed)),
100 ]));
101 }
102
103 Ok(build_dict([
104 ("roots", VmValue::List(Arc::new(added))),
105 ("readonly_root_count", VmValue::Int(guard.len() as i64)),
106 ("readonly_files_indexed", VmValue::Int(total_files as i64)),
107 ]))
108}
109
110/// Resolve `rel_or_abs` against the primary index first, then fall back to
111/// every read-only secondary root. Returns the canonical absolute path of
112/// the first index that contains the file. Used by `read_range` so a symbol
113/// discovered in a dependency root can be read back.
114pub(super) fn resolve_read_path(
115 primary: &SharedIndex,
116 readonly: &ReadonlyRoots,
117 path: &str,
118) -> Option<PathBuf> {
119 // Resolution order, preserving the pre-#2403 contract:
120 // 1. An *existing* file inside the primary workspace root.
121 // 2. An *existing* file inside any read-only dependency root.
122 // 3. As a fallback, the primary root's resolution even if the file
123 // does not exist — so a missing in-workspace path still fails at
124 // the read step with "file not found" exactly as before, rather
125 // than being rejected as out-of-scope.
126 // `absolute_path` confines every candidate to a known root and accepts
127 // not-yet-existing paths (it is shared with write paths), so the
128 // existence filter is what lets a dependency-only path win over the
129 // phantom project path it would otherwise resolve to.
130 let (primary_resolved, primary_built) = {
131 let guard = primary.lock().expect("code_index mutex poisoned");
132 (
133 guard.as_ref().and_then(|state| state.absolute_path(path)),
134 guard.is_some(),
135 )
136 };
137 if let Some(abs) = primary_resolved.as_ref().filter(|p| p.exists()) {
138 return Some(abs.clone());
139 }
140 {
141 let guard = readonly.lock().expect("readonly roots mutex poisoned");
142 if let Some(abs) = guard
143 .iter()
144 .find_map(|state| state.absolute_path(path).filter(|p| p.exists()))
145 {
146 return Some(abs);
147 }
148 }
149 // When the primary index has never been built, there is no workspace
150 // root to confine `path` against, so `absolute_path` yields `None` and
151 // both lookups above miss. Pre-#3352 `read_range` handled this by
152 // reading the raw path straight off the filesystem (the old
153 // `None => PathBuf::from(&path)` arm). Restore that fallback so callers
154 // that read a path before any rebuild — e.g. `agent_run` scanning a
155 // process-output temp file to surface buried test-failure lines, and
156 // eval/verify reads over arbitrary shell output — still resolve instead
157 // of erroring "path must stay within the indexed workspace root". The
158 // read step still returns a genuine "file not found" for a missing
159 // path, so the contract is unchanged.
160 if !primary_built {
161 return Some(PathBuf::from(path));
162 }
163 primary_resolved
164}
165
166/// Run `query`-style scoring over every read-only secondary root, tagging
167/// each hit with its `root` so callers can disambiguate a dependency hit
168/// from a project hit. Returns hits already merged with whatever the
169/// primary index produced. Hits keep the same `path`/`score`/`match_count`
170/// shape as the primary query plus a `root` field; primary hits carry
171/// `root: nil`.
172pub(super) fn query_readonly_hits(
173 readonly: &ReadonlyRoots,
174 needle: &str,
175 case_sensitive: bool,
176) -> Vec<super::builtins::Hit> {
177 let guard = readonly.lock().expect("readonly roots mutex poisoned");
178 let mut hits: Vec<super::builtins::Hit> = Vec::new();
179 for state in guard.iter() {
180 super::builtins::collect_hits_into(state, needle, case_sensitive, &mut hits);
181 }
182 hits
183}