1use std::collections::{BTreeMap, HashSet};
10use std::path::PathBuf;
11use std::rc::Rc;
12use std::sync::{Arc, Mutex};
13use std::time::Instant;
14
15use harn_vm::VmValue;
16
17use super::file_table::FileId;
18use super::imports;
19use super::state::IndexState;
20use crate::error::HostlibError;
21use crate::tools::args::{
22 build_dict, dict_arg, optional_bool, optional_int, optional_string, require_string, str_value,
23};
24
25pub type SharedIndex = Arc<Mutex<Option<IndexState>>>;
30
31pub(super) const BUILTIN_QUERY: &str = "hostlib_code_index_query";
32pub(super) const BUILTIN_REBUILD: &str = "hostlib_code_index_rebuild";
33pub(super) const BUILTIN_STATS: &str = "hostlib_code_index_stats";
34pub(super) const BUILTIN_IMPORTS_FOR: &str = "hostlib_code_index_imports_for";
35pub(super) const BUILTIN_IMPORTERS_OF: &str = "hostlib_code_index_importers_of";
36
37pub(super) fn run_query(index: &SharedIndex, args: &[VmValue]) -> Result<VmValue, HostlibError> {
38 let raw = dict_arg(BUILTIN_QUERY, args)?;
39 let dict = raw.as_ref();
40 let needle = require_string(BUILTIN_QUERY, dict, "needle")?;
41 if needle.is_empty() {
42 return Err(HostlibError::InvalidParameter {
43 builtin: BUILTIN_QUERY,
44 param: "needle",
45 message: "must not be empty".to_string(),
46 });
47 }
48 let case_sensitive = optional_bool(BUILTIN_QUERY, dict, "case_sensitive", false)?;
49 let max_results = optional_int(BUILTIN_QUERY, dict, "max_results", 100)?;
50 if max_results < 1 {
51 return Err(HostlibError::InvalidParameter {
52 builtin: BUILTIN_QUERY,
53 param: "max_results",
54 message: "must be >= 1".to_string(),
55 });
56 }
57 let scope = optional_string_list(dict, "scope")?;
58
59 let guard = index.lock().expect("code_index mutex poisoned");
60 let Some(state) = guard.as_ref() else {
61 return Ok(empty_query_response());
62 };
63
64 let candidate_ids = candidates_for(state, &needle);
65 let mut hits: Vec<Hit> = Vec::new();
66 for id in candidate_ids {
67 let Some(file) = state.files.get(&id) else {
68 continue;
69 };
70 if !scope_allows(&scope, &file.relative_path) {
71 continue;
72 }
73 let Some(text) = read_file_text(&state.root, &file.relative_path) else {
74 continue;
75 };
76 let count = count_matches(&text, &needle, case_sensitive);
77 if count == 0 {
78 continue;
79 }
80 hits.push(Hit {
81 path: file.relative_path.clone(),
82 score: count as f64,
83 match_count: count,
84 });
85 }
86 hits.sort_by(|a, b| {
87 b.match_count
88 .cmp(&a.match_count)
89 .then_with(|| a.path.cmp(&b.path))
90 });
91 let max = max_results as usize;
92 let truncated = hits.len() > max;
93 if truncated {
94 hits.truncate(max);
95 }
96 Ok(build_dict([
97 (
98 "results",
99 VmValue::List(Rc::new(hits.into_iter().map(hit_to_value).collect())),
100 ),
101 ("truncated", VmValue::Bool(truncated)),
102 ]))
103}
104
105pub(super) fn run_rebuild(index: &SharedIndex, args: &[VmValue]) -> Result<VmValue, HostlibError> {
106 let raw = dict_arg(BUILTIN_REBUILD, args)?;
107 let dict = raw.as_ref();
108 let _force = optional_bool(BUILTIN_REBUILD, dict, "force", false)?;
109 let root = optional_string(BUILTIN_REBUILD, dict, "root")?
110 .map(PathBuf::from)
111 .unwrap_or_else(|| std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")));
112 if !root.exists() {
113 return Err(HostlibError::InvalidParameter {
114 builtin: BUILTIN_REBUILD,
115 param: "root",
116 message: format!("path `{}` does not exist", root.display()),
117 });
118 }
119 if !root.is_dir() {
120 return Err(HostlibError::InvalidParameter {
121 builtin: BUILTIN_REBUILD,
122 param: "root",
123 message: format!("path `{}` is not a directory", root.display()),
124 });
125 }
126 let started = Instant::now();
127 let (state, outcome) = IndexState::build_from_root(&root);
128 let elapsed_ms = started.elapsed().as_millis() as i64;
129 {
130 let mut guard = index.lock().expect("code_index mutex poisoned");
131 *guard = Some(state);
132 }
133 Ok(build_dict([
134 ("files_indexed", VmValue::Int(outcome.files_indexed as i64)),
135 ("files_skipped", VmValue::Int(outcome.files_skipped as i64)),
136 ("elapsed_ms", VmValue::Int(elapsed_ms)),
137 ]))
138}
139
140pub(super) fn run_stats(index: &SharedIndex, _args: &[VmValue]) -> Result<VmValue, HostlibError> {
141 let guard = index.lock().expect("code_index mutex poisoned");
142 let Some(state) = guard.as_ref() else {
143 return Ok(empty_stats_response());
144 };
145 Ok(build_dict([
146 ("indexed_files", VmValue::Int(state.files.len() as i64)),
147 (
148 "trigrams",
149 VmValue::Int(state.trigrams.distinct_trigrams() as i64),
150 ),
151 ("words", VmValue::Int(state.words.distinct_words() as i64)),
152 ("memory_bytes", VmValue::Int(state.estimated_bytes() as i64)),
153 (
154 "last_rebuild_unix_ms",
155 VmValue::Int(state.last_built_unix_ms),
156 ),
157 ]))
158}
159
160pub(super) fn run_imports_for(
161 index: &SharedIndex,
162 args: &[VmValue],
163) -> Result<VmValue, HostlibError> {
164 let raw = dict_arg(BUILTIN_IMPORTS_FOR, args)?;
165 let dict = raw.as_ref();
166 let path = require_string(BUILTIN_IMPORTS_FOR, dict, "path")?;
167 let guard = index.lock().expect("code_index mutex poisoned");
168 let Some(state) = guard.as_ref() else {
169 return Ok(empty_imports_response(&path));
170 };
171 let Some(file_id) = state.lookup_path(&path) else {
172 return Ok(empty_imports_response(&path));
173 };
174 let Some(file) = state.files.get(&file_id) else {
175 return Ok(empty_imports_response(&path));
176 };
177 let kind = imports::import_kind(&file.language).to_string();
178 let base_dir = imports::parent_dir(&file.relative_path);
179 let resolved_ids: HashSet<FileId> = state.deps.imports_of(file_id).into_iter().collect();
180 let mut entries: Vec<VmValue> = Vec::with_capacity(file.imports.len());
181 for raw_import in &file.imports {
182 let resolved_path =
183 imports::resolve_module(raw_import, &file.language, &base_dir, &state.path_to_id)
184 .filter(|id| resolved_ids.contains(id))
185 .and_then(|id| state.files.get(&id).map(|f| f.relative_path.clone()));
186 entries.push(import_entry(raw_import, resolved_path.as_deref(), &kind));
187 }
188 Ok(build_dict([
189 ("path", str_value(&file.relative_path)),
190 ("imports", VmValue::List(Rc::new(entries))),
191 ]))
192}
193
194pub(super) fn run_importers_of(
195 index: &SharedIndex,
196 args: &[VmValue],
197) -> Result<VmValue, HostlibError> {
198 let raw = dict_arg(BUILTIN_IMPORTERS_OF, args)?;
199 let dict = raw.as_ref();
200 let module = require_string(BUILTIN_IMPORTERS_OF, dict, "module")?;
201 let guard = index.lock().expect("code_index mutex poisoned");
202 let Some(state) = guard.as_ref() else {
203 return Ok(empty_importers_response(&module));
204 };
205
206 let target_id = state.lookup_path(&module).or_else(|| {
207 let needle = format!("/{module}");
211 state
212 .path_to_id
213 .iter()
214 .find(|(p, _)| p.ends_with(&needle) || *p == &module)
215 .map(|(_, id)| *id)
216 });
217
218 let mut importers: Vec<String> = match target_id {
219 Some(id) => state
220 .deps
221 .importers_of(id)
222 .into_iter()
223 .filter_map(|importer_id| {
224 state
225 .files
226 .get(&importer_id)
227 .map(|f| f.relative_path.clone())
228 })
229 .collect(),
230 None => Vec::new(),
231 };
232 importers.sort();
233 Ok(build_dict([
234 ("module", str_value(&module)),
235 (
236 "importers",
237 VmValue::List(Rc::new(importers.into_iter().map(str_value).collect())),
238 ),
239 ]))
240}
241
242fn candidates_for(state: &IndexState, needle: &str) -> Vec<FileId> {
243 if needle.len() >= 3 {
244 let trigrams = super::trigram::query_trigrams(needle);
245 return state.trigrams.query(&trigrams).into_iter().collect();
246 }
247 state.files.keys().copied().collect()
250}
251
252fn read_file_text(root: &std::path::Path, relative: &str) -> Option<String> {
253 std::fs::read_to_string(root.join(relative)).ok()
254}
255
256fn count_matches(haystack: &str, needle: &str, case_sensitive: bool) -> u64 {
257 if case_sensitive {
258 haystack.matches(needle).count() as u64
259 } else {
260 let lower_h = haystack.to_lowercase();
261 let lower_n = needle.to_lowercase();
262 lower_h.matches(&lower_n).count() as u64
263 }
264}
265
266fn scope_allows(scope: &[String], relative: &str) -> bool {
267 if scope.is_empty() {
268 return true;
269 }
270 scope
271 .iter()
272 .any(|s| relative == s || relative.starts_with(&format!("{s}/")) || s.is_empty())
273}
274
275fn optional_string_list(
276 dict: &BTreeMap<String, VmValue>,
277 key: &'static str,
278) -> Result<Vec<String>, HostlibError> {
279 match dict.get(key) {
280 None | Some(VmValue::Nil) => Ok(Vec::new()),
281 Some(VmValue::List(items)) => {
282 let mut out = Vec::with_capacity(items.len());
283 for item in items.iter() {
284 match item {
285 VmValue::String(s) => out.push(s.to_string()),
286 other => {
287 return Err(HostlibError::InvalidParameter {
288 builtin: BUILTIN_QUERY,
289 param: key,
290 message: format!(
291 "expected list of strings, got element {}",
292 other.type_name()
293 ),
294 });
295 }
296 }
297 }
298 Ok(out)
299 }
300 Some(other) => Err(HostlibError::InvalidParameter {
301 builtin: BUILTIN_QUERY,
302 param: key,
303 message: format!("expected list of strings, got {}", other.type_name()),
304 }),
305 }
306}
307
308struct Hit {
309 path: String,
310 score: f64,
311 match_count: u64,
312}
313
314fn hit_to_value(hit: Hit) -> VmValue {
315 let Hit {
316 path,
317 score,
318 match_count,
319 } = hit;
320 build_dict([
321 ("path", str_value(&path)),
322 ("score", VmValue::Float(score)),
323 ("match_count", VmValue::Int(match_count as i64)),
324 ])
325}
326
327fn import_entry(module: &str, resolved: Option<&str>, kind: &str) -> VmValue {
328 let mut map: BTreeMap<String, VmValue> = BTreeMap::new();
329 map.insert("module".into(), str_value(module));
330 map.insert(
331 "resolved_path".into(),
332 match resolved {
333 Some(p) => str_value(p),
334 None => VmValue::Nil,
335 },
336 );
337 map.insert("kind".into(), str_value(kind));
338 VmValue::Dict(Rc::new(map))
339}
340
341fn empty_query_response() -> VmValue {
342 build_dict([
343 ("results", VmValue::List(Rc::new(Vec::new()))),
344 ("truncated", VmValue::Bool(false)),
345 ])
346}
347
348fn empty_stats_response() -> VmValue {
349 build_dict([
350 ("indexed_files", VmValue::Int(0)),
351 ("trigrams", VmValue::Int(0)),
352 ("words", VmValue::Int(0)),
353 ("memory_bytes", VmValue::Int(0)),
354 ("last_rebuild_unix_ms", VmValue::Nil),
355 ])
356}
357
358fn empty_imports_response(path: &str) -> VmValue {
359 build_dict([
360 ("path", str_value(path)),
361 ("imports", VmValue::List(Rc::new(Vec::new()))),
362 ])
363}
364
365fn empty_importers_response(module: &str) -> VmValue {
366 build_dict([
367 ("module", str_value(module)),
368 ("importers", VmValue::List(Rc::new(Vec::new()))),
369 ])
370}