Skip to main content

harn_hostlib/code_index/
readonly.rs

1//! Additive, read-only secondary roots for the code index (issue #2403
2//! follow-up).
3//!
4//! The primary [`IndexState`] (built by `hostlib_code_index_rebuild`) owns
5//! exactly one writable workspace root and flips its slot wholesale on
6//! every rebuild. That is correct for the project under edit, but it means
7//! a caller cannot also index *dependency* / SDK roots (e.g. the macOS
8//! IOKit headers that declare `kIOPSTimeToFullChargeKey`) without
9//! clobbering the project index.
10//!
11//! This module adds a parallel, **read-only** set of secondary
12//! [`IndexState`]s that live beside the primary in the capability. They are
13//! merged into path-based query results so library API symbols become
14//! discoverable, while staying entirely out of the project's writable
15//! scope:
16//!
17//! - They are never mutated by `version_record`, `reindex_file`,
18//!   `agent_*`, `lock_*`, or `rename_symbol` — those builtins only ever
19//!   touch the primary slot.
20//! - `read_range` will resolve a path inside a read-only root (so a symbol
21//!   discovered there can be read), but every *write* path (rename,
22//!   reindex, version log, locks) still rejects out-of-project paths
23//!   exactly as before, because those paths are not in the primary index.
24//!
25//! Concurrency mirrors the primary: a single `Arc<Mutex<Vec<IndexState>>>`
26//! cell, so the same serialised view is shared by every Harn VM wired
27//! against the capability.
28
29use std::path::PathBuf;
30use std::sync::{Arc, Mutex};
31
32use harn_vm::VmValue;
33
34use super::builtins::SharedIndex;
35use super::state::IndexState;
36use crate::error::HostlibError;
37use crate::tools::args::{
38    build_dict, dict_arg, optional_bool, optional_string_list, str_value, to_agent_path,
39};
40
41/// Shared cell holding the read-only secondary indexes. Each entry is a
42/// fully-built [`IndexState`] anchored at one dependency root. Empty until
43/// `hostlib_code_index_add_readonly_roots` is called.
44pub type ReadonlyRoots = Arc<Mutex<Vec<IndexState>>>;
45
46pub(super) const BUILTIN_ADD_READONLY_ROOTS: &str = "hostlib_code_index_add_readonly_roots";
47
48/// Build and merge one or more dependency roots into the read-only set.
49///
50/// Idempotent per root: re-adding a root that is already present rebuilds
51/// that entry in place rather than appending a duplicate. `replace: true`
52/// clears the existing set first (so a caller can swap the whole dependency
53/// fan-out without accumulating stale roots).
54pub(super) fn run_add_readonly_roots(
55    readonly: &ReadonlyRoots,
56    args: &[VmValue],
57) -> Result<VmValue, HostlibError> {
58    let raw = dict_arg(BUILTIN_ADD_READONLY_ROOTS, args)?;
59    let dict = raw.as_ref();
60    let roots = optional_string_list(BUILTIN_ADD_READONLY_ROOTS, dict, "roots")?;
61    let replace = optional_bool(BUILTIN_ADD_READONLY_ROOTS, dict, "replace", false)?;
62
63    let mut guard = readonly.lock().expect("readonly roots mutex poisoned");
64    if replace {
65        guard.clear();
66    }
67
68    let mut added: Vec<VmValue> = Vec::with_capacity(roots.len());
69    let mut total_files: usize = 0;
70    for raw_root in roots {
71        let root = PathBuf::from(&raw_root);
72        if !root.exists() {
73            return Err(HostlibError::InvalidParameter {
74                builtin: BUILTIN_ADD_READONLY_ROOTS,
75                param: "roots",
76                message: format!("path `{}` does not exist", root.display()),
77            });
78        }
79        if !root.is_dir() {
80            return Err(HostlibError::InvalidParameter {
81                builtin: BUILTIN_ADD_READONLY_ROOTS,
82                param: "roots",
83                message: format!("path `{}` is not a directory", root.display()),
84            });
85        }
86        let (state, outcome) = IndexState::build_from_root(&root);
87        let files_indexed = outcome.files_indexed as i64;
88        total_files += state.files.len();
89        // Idempotent: replace any existing entry for the same canonical
90        // root rather than appending a duplicate.
91        let canonical = state.root.clone();
92        if let Some(slot) = guard.iter_mut().find(|s| s.root == canonical) {
93            *slot = state;
94        } else {
95            guard.push(state);
96        }
97        added.push(build_dict([
98            ("root", str_value(to_agent_path(&canonical))),
99            ("files_indexed", VmValue::Int(files_indexed)),
100        ]));
101    }
102
103    Ok(build_dict([
104        ("roots", VmValue::List(Arc::new(added))),
105        ("readonly_root_count", VmValue::Int(guard.len() as i64)),
106        ("readonly_files_indexed", VmValue::Int(total_files as i64)),
107    ]))
108}
109
110/// Resolve `rel_or_abs` against the primary index first, then fall back to
111/// every read-only secondary root. Returns the canonical absolute path of
112/// the first index that contains the file. Used by `read_range` so a symbol
113/// discovered in a dependency root can be read back.
114pub(super) fn resolve_read_path(
115    primary: &SharedIndex,
116    readonly: &ReadonlyRoots,
117    path: &str,
118) -> Option<PathBuf> {
119    // Resolution order, preserving the pre-#2403 contract:
120    //   1. An *existing* file inside the primary workspace root.
121    //   2. An *existing* file inside any read-only dependency root.
122    //   3. As a fallback, the primary root's resolution even if the file
123    //      does not exist — so a missing in-workspace path still fails at
124    //      the read step with "file not found" exactly as before, rather
125    //      than being rejected as out-of-scope.
126    // `absolute_path` confines every candidate to a known root and accepts
127    // not-yet-existing paths (it is shared with write paths), so the
128    // existence filter is what lets a dependency-only path win over the
129    // phantom project path it would otherwise resolve to.
130    let (primary_resolved, primary_built) = {
131        let guard = primary.lock().expect("code_index mutex poisoned");
132        (
133            guard.as_ref().and_then(|state| state.absolute_path(path)),
134            guard.is_some(),
135        )
136    };
137    if let Some(abs) = primary_resolved.as_ref().filter(|p| p.exists()) {
138        return Some(abs.clone());
139    }
140    {
141        let guard = readonly.lock().expect("readonly roots mutex poisoned");
142        if let Some(abs) = guard
143            .iter()
144            .find_map(|state| state.absolute_path(path).filter(|p| p.exists()))
145        {
146            return Some(abs);
147        }
148    }
149    // When the primary index has never been built, there is no workspace
150    // root to confine `path` against, so `absolute_path` yields `None` and
151    // both lookups above miss. Pre-#3352 `read_range` handled this by
152    // reading the raw path straight off the filesystem (the old
153    // `None => PathBuf::from(&path)` arm). Restore that fallback so callers
154    // that read a path before any rebuild — e.g. `agent_run` scanning a
155    // process-output temp file to surface buried test-failure lines, and
156    // eval/verify reads over arbitrary shell output — still resolve instead
157    // of erroring "path must stay within the indexed workspace root". The
158    // read step still returns a genuine "file not found" for a missing
159    // path, so the contract is unchanged.
160    if !primary_built {
161        return Some(PathBuf::from(path));
162    }
163    primary_resolved
164}
165
166/// Run `query`-style scoring over every read-only secondary root, tagging
167/// each hit with its `root` so callers can disambiguate a dependency hit
168/// from a project hit. Returns hits already merged with whatever the
169/// primary index produced. Hits keep the same `path`/`score`/`match_count`
170/// shape as the primary query plus a `root` field; primary hits carry
171/// `root: nil`.
172pub(super) fn query_readonly_hits(
173    readonly: &ReadonlyRoots,
174    needle: &str,
175    case_sensitive: bool,
176) -> Vec<super::builtins::Hit> {
177    let guard = readonly.lock().expect("readonly roots mutex poisoned");
178    let mut hits: Vec<super::builtins::Hit> = Vec::new();
179    for state in guard.iter() {
180        super::builtins::collect_hits_into(state, needle, case_sensitive, &mut hits);
181    }
182    hits
183}