Skip to main content

harn_hostlib/code_index/
builtins.rs

1//! Host-builtin handlers for the `code_index` module.
2//!
3//! Each handler shape mirrors the schema in
4//! `schemas/code_index/<method>.{request,response}.json`. A single shared
5//! [`SharedIndex`] cell is captured by the closure of every handler so all
6//! five builtins observe the same in-memory state — `rebuild` writes,
7//! everything else reads.
8
9use std::collections::{BTreeMap, HashSet};
10use std::path::PathBuf;
11use std::rc::Rc;
12use std::sync::{Arc, Mutex};
13use std::time::Instant;
14
15use harn_vm::VmValue;
16
17use super::file_table::FileId;
18use super::imports;
19use super::state::IndexState;
20use crate::error::HostlibError;
21use crate::tools::args::{
22    build_dict, dict_arg, optional_bool, optional_int, optional_string, require_string, str_value,
23};
24
25/// Shared, mutable cell carrying the (at most one) live workspace index.
26/// `Mutex` rather than `RwLock` because rebuilds flip the slot wholesale —
27/// fine-grained concurrency between rebuild + reads is intentionally not
28/// supported (the Swift side serialised through a single actor too).
29pub type SharedIndex = Arc<Mutex<Option<IndexState>>>;
30
31pub(super) const BUILTIN_QUERY: &str = "hostlib_code_index_query";
32pub(super) const BUILTIN_REBUILD: &str = "hostlib_code_index_rebuild";
33pub(super) const BUILTIN_STATS: &str = "hostlib_code_index_stats";
34pub(super) const BUILTIN_IMPORTS_FOR: &str = "hostlib_code_index_imports_for";
35pub(super) const BUILTIN_IMPORTERS_OF: &str = "hostlib_code_index_importers_of";
36
37pub(super) fn run_query(index: &SharedIndex, args: &[VmValue]) -> Result<VmValue, HostlibError> {
38    let raw = dict_arg(BUILTIN_QUERY, args)?;
39    let dict = raw.as_ref();
40    let needle = require_string(BUILTIN_QUERY, dict, "needle")?;
41    if needle.is_empty() {
42        return Err(HostlibError::InvalidParameter {
43            builtin: BUILTIN_QUERY,
44            param: "needle",
45            message: "must not be empty".to_string(),
46        });
47    }
48    let case_sensitive = optional_bool(BUILTIN_QUERY, dict, "case_sensitive", false)?;
49    let max_results = optional_int(BUILTIN_QUERY, dict, "max_results", 100)?;
50    if max_results < 1 {
51        return Err(HostlibError::InvalidParameter {
52            builtin: BUILTIN_QUERY,
53            param: "max_results",
54            message: "must be >= 1".to_string(),
55        });
56    }
57    let scope = optional_string_list(dict, "scope")?;
58
59    let guard = index.lock().expect("code_index mutex poisoned");
60    let Some(state) = guard.as_ref() else {
61        return Ok(empty_query_response());
62    };
63
64    let candidate_ids = candidates_for(state, &needle);
65    let mut hits: Vec<Hit> = Vec::new();
66    for id in candidate_ids {
67        let Some(file) = state.files.get(&id) else {
68            continue;
69        };
70        if !scope_allows(&scope, &file.relative_path) {
71            continue;
72        }
73        let Some(text) = read_file_text(&state.root, &file.relative_path) else {
74            continue;
75        };
76        let count = count_matches(&text, &needle, case_sensitive);
77        if count == 0 {
78            continue;
79        }
80        hits.push(Hit {
81            path: file.relative_path.clone(),
82            score: count as f64,
83            match_count: count,
84        });
85    }
86    hits.sort_by(|a, b| {
87        b.match_count
88            .cmp(&a.match_count)
89            .then_with(|| a.path.cmp(&b.path))
90    });
91    let max = max_results as usize;
92    let truncated = hits.len() > max;
93    if truncated {
94        hits.truncate(max);
95    }
96    Ok(build_dict([
97        (
98            "results",
99            VmValue::List(Rc::new(hits.into_iter().map(hit_to_value).collect())),
100        ),
101        ("truncated", VmValue::Bool(truncated)),
102    ]))
103}
104
105pub(super) fn run_rebuild(index: &SharedIndex, args: &[VmValue]) -> Result<VmValue, HostlibError> {
106    let raw = dict_arg(BUILTIN_REBUILD, args)?;
107    let dict = raw.as_ref();
108    let _force = optional_bool(BUILTIN_REBUILD, dict, "force", false)?;
109    let root = optional_string(BUILTIN_REBUILD, dict, "root")?
110        .map(PathBuf::from)
111        .unwrap_or_else(|| std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
112    if !root.exists() {
113        return Err(HostlibError::InvalidParameter {
114            builtin: BUILTIN_REBUILD,
115            param: "root",
116            message: format!("path `{}` does not exist", root.display()),
117        });
118    }
119    if !root.is_dir() {
120        return Err(HostlibError::InvalidParameter {
121            builtin: BUILTIN_REBUILD,
122            param: "root",
123            message: format!("path `{}` is not a directory", root.display()),
124        });
125    }
126    let started = Instant::now();
127    let (state, outcome) = IndexState::build_from_root(&root);
128    let elapsed_ms = started.elapsed().as_millis() as i64;
129    {
130        let mut guard = index.lock().expect("code_index mutex poisoned");
131        *guard = Some(state);
132    }
133    Ok(build_dict([
134        ("files_indexed", VmValue::Int(outcome.files_indexed as i64)),
135        ("files_skipped", VmValue::Int(outcome.files_skipped as i64)),
136        ("elapsed_ms", VmValue::Int(elapsed_ms)),
137    ]))
138}
139
140pub(super) fn run_stats(index: &SharedIndex, _args: &[VmValue]) -> Result<VmValue, HostlibError> {
141    let guard = index.lock().expect("code_index mutex poisoned");
142    let Some(state) = guard.as_ref() else {
143        return Ok(empty_stats_response());
144    };
145    Ok(build_dict([
146        ("indexed_files", VmValue::Int(state.files.len() as i64)),
147        (
148            "trigrams",
149            VmValue::Int(state.trigrams.distinct_trigrams() as i64),
150        ),
151        ("words", VmValue::Int(state.words.distinct_words() as i64)),
152        ("memory_bytes", VmValue::Int(state.estimated_bytes() as i64)),
153        (
154            "last_rebuild_unix_ms",
155            VmValue::Int(state.last_built_unix_ms),
156        ),
157    ]))
158}
159
160pub(super) fn run_imports_for(
161    index: &SharedIndex,
162    args: &[VmValue],
163) -> Result<VmValue, HostlibError> {
164    let raw = dict_arg(BUILTIN_IMPORTS_FOR, args)?;
165    let dict = raw.as_ref();
166    let path = require_string(BUILTIN_IMPORTS_FOR, dict, "path")?;
167    let guard = index.lock().expect("code_index mutex poisoned");
168    let Some(state) = guard.as_ref() else {
169        return Ok(empty_imports_response(&path));
170    };
171    let Some(file_id) = state.lookup_path(&path) else {
172        return Ok(empty_imports_response(&path));
173    };
174    let Some(file) = state.files.get(&file_id) else {
175        return Ok(empty_imports_response(&path));
176    };
177    let kind = imports::import_kind(&file.language).to_string();
178    let base_dir = imports::parent_dir(&file.relative_path);
179    let resolved_ids: HashSet<FileId> = state.deps.imports_of(file_id).into_iter().collect();
180    let mut entries: Vec<VmValue> = Vec::with_capacity(file.imports.len());
181    for raw_import in &file.imports {
182        let resolved_path =
183            imports::resolve_module(raw_import, &file.language, &base_dir, &state.path_to_id)
184                .filter(|id| resolved_ids.contains(id))
185                .and_then(|id| state.files.get(&id).map(|f| f.relative_path.clone()));
186        entries.push(import_entry(raw_import, resolved_path.as_deref(), &kind));
187    }
188    Ok(build_dict([
189        ("path", str_value(&file.relative_path)),
190        ("imports", VmValue::List(Rc::new(entries))),
191    ]))
192}
193
194pub(super) fn run_importers_of(
195    index: &SharedIndex,
196    args: &[VmValue],
197) -> Result<VmValue, HostlibError> {
198    let raw = dict_arg(BUILTIN_IMPORTERS_OF, args)?;
199    let dict = raw.as_ref();
200    let module = require_string(BUILTIN_IMPORTERS_OF, dict, "module")?;
201    let guard = index.lock().expect("code_index mutex poisoned");
202    let Some(state) = guard.as_ref() else {
203        return Ok(empty_importers_response(&module));
204    };
205
206    let target_id = state.lookup_path(&module).or_else(|| {
207        // Fallback: suffix-match on relative paths so callers can request
208        // by basename (matching the `allowSuffixMatch` convention used by
209        // the resolver itself).
210        let needle = format!("/{module}");
211        state
212            .path_to_id
213            .iter()
214            .find(|(p, _)| p.ends_with(&needle) || *p == &module)
215            .map(|(_, id)| *id)
216    });
217
218    let mut importers: Vec<String> = match target_id {
219        Some(id) => state
220            .deps
221            .importers_of(id)
222            .into_iter()
223            .filter_map(|importer_id| {
224                state
225                    .files
226                    .get(&importer_id)
227                    .map(|f| f.relative_path.clone())
228            })
229            .collect(),
230        None => Vec::new(),
231    };
232    importers.sort();
233    Ok(build_dict([
234        ("module", str_value(&module)),
235        (
236            "importers",
237            VmValue::List(Rc::new(importers.into_iter().map(str_value).collect())),
238        ),
239    ]))
240}
241
242fn candidates_for(state: &IndexState, needle: &str) -> Vec<FileId> {
243    if needle.len() >= 3 {
244        let trigrams = super::trigram::query_trigrams(needle);
245        return state.trigrams.query(&trigrams).into_iter().collect();
246    }
247    // Sub-3-byte needles are below the trigram floor — fall back to
248    // scanning the whole file table (rare interactive case).
249    state.files.keys().copied().collect()
250}
251
252fn read_file_text(root: &std::path::Path, relative: &str) -> Option<String> {
253    std::fs::read_to_string(root.join(relative)).ok()
254}
255
256fn count_matches(haystack: &str, needle: &str, case_sensitive: bool) -> u64 {
257    if case_sensitive {
258        haystack.matches(needle).count() as u64
259    } else {
260        let lower_h = haystack.to_lowercase();
261        let lower_n = needle.to_lowercase();
262        lower_h.matches(&lower_n).count() as u64
263    }
264}
265
266fn scope_allows(scope: &[String], relative: &str) -> bool {
267    if scope.is_empty() {
268        return true;
269    }
270    scope
271        .iter()
272        .any(|s| relative == s || relative.starts_with(&format!("{s}/")) || s.is_empty())
273}
274
275fn optional_string_list(
276    dict: &BTreeMap<String, VmValue>,
277    key: &'static str,
278) -> Result<Vec<String>, HostlibError> {
279    match dict.get(key) {
280        None | Some(VmValue::Nil) => Ok(Vec::new()),
281        Some(VmValue::List(items)) => {
282            let mut out = Vec::with_capacity(items.len());
283            for item in items.iter() {
284                match item {
285                    VmValue::String(s) => out.push(s.to_string()),
286                    other => {
287                        return Err(HostlibError::InvalidParameter {
288                            builtin: BUILTIN_QUERY,
289                            param: key,
290                            message: format!(
291                                "expected list of strings, got element {}",
292                                other.type_name()
293                            ),
294                        });
295                    }
296                }
297            }
298            Ok(out)
299        }
300        Some(other) => Err(HostlibError::InvalidParameter {
301            builtin: BUILTIN_QUERY,
302            param: key,
303            message: format!("expected list of strings, got {}", other.type_name()),
304        }),
305    }
306}
307
308struct Hit {
309    path: String,
310    score: f64,
311    match_count: u64,
312}
313
314fn hit_to_value(hit: Hit) -> VmValue {
315    let Hit {
316        path,
317        score,
318        match_count,
319    } = hit;
320    build_dict([
321        ("path", str_value(&path)),
322        ("score", VmValue::Float(score)),
323        ("match_count", VmValue::Int(match_count as i64)),
324    ])
325}
326
327fn import_entry(module: &str, resolved: Option<&str>, kind: &str) -> VmValue {
328    let mut map: BTreeMap<String, VmValue> = BTreeMap::new();
329    map.insert("module".into(), str_value(module));
330    map.insert(
331        "resolved_path".into(),
332        match resolved {
333            Some(p) => str_value(p),
334            None => VmValue::Nil,
335        },
336    );
337    map.insert("kind".into(), str_value(kind));
338    VmValue::Dict(Rc::new(map))
339}
340
341fn empty_query_response() -> VmValue {
342    build_dict([
343        ("results", VmValue::List(Rc::new(Vec::new()))),
344        ("truncated", VmValue::Bool(false)),
345    ])
346}
347
348fn empty_stats_response() -> VmValue {
349    build_dict([
350        ("indexed_files", VmValue::Int(0)),
351        ("trigrams", VmValue::Int(0)),
352        ("words", VmValue::Int(0)),
353        ("memory_bytes", VmValue::Int(0)),
354        ("last_rebuild_unix_ms", VmValue::Nil),
355    ])
356}
357
358fn empty_imports_response(path: &str) -> VmValue {
359    build_dict([
360        ("path", str_value(path)),
361        ("imports", VmValue::List(Rc::new(Vec::new()))),
362    ])
363}
364
365fn empty_importers_response(module: &str) -> VmValue {
366    build_dict([
367        ("module", str_value(module)),
368        ("importers", VmValue::List(Rc::new(Vec::new()))),
369    ])
370}