1use std::path::{Path, PathBuf};
17use std::process::Command;
18use std::sync::Arc;
19use std::time::{SystemTime, UNIX_EPOCH};
20
21use harn_vm::VmValue;
22
23use crate::error::HostlibError;
24use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
25use crate::tools::args::{
26 build_dict, dict_arg, optional_bool, optional_int, require_string, str_value,
27};
28
29mod commands;
30mod discover;
31mod extensions;
32mod folders;
33mod git;
34mod imports;
35mod manifest;
36mod result;
37mod scoring;
38mod snapshot;
39mod subproject;
40mod symbols;
41mod test_mapping;
42
43fn strip_ambient_git_env(cmd: &mut Command) {
44 for (key, _) in std::env::vars() {
47 if key.starts_with("GIT_") {
48 cmd.env_remove(&key);
49 }
50 }
51}
52
53pub use git::GitCapabilities;
54pub use result::{
55 DependencyEdge, FileRecord, FolderRecord, LanguageStat, ProjectMetadata, ScanDelta, ScanResult,
56 SubProject, SymbolKind, SymbolRecord,
57};
58
59const SCAN_PROJECT_BUILTIN: &str = "hostlib_scanner_scan_project";
60const SCAN_INCREMENTAL_BUILTIN: &str = "hostlib_scanner_scan_incremental";
61
62#[derive(Default)]
64pub struct ScannerCapability;
65
66impl HostlibCapability for ScannerCapability {
67 fn module_name(&self) -> &'static str {
68 "scanner"
69 }
70
71 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
72 let scan_project: SyncHandler = Arc::new(scan_project_handler);
73 registry.register(RegisteredBuiltin {
74 name: SCAN_PROJECT_BUILTIN,
75 module: "scanner",
76 method: "scan_project",
77 handler: scan_project,
78 });
79 let scan_incremental: SyncHandler = Arc::new(scan_incremental_handler);
80 registry.register(RegisteredBuiltin {
81 name: SCAN_INCREMENTAL_BUILTIN,
82 module: "scanner",
83 method: "scan_incremental",
84 handler: scan_incremental,
85 });
86 }
87}
88
89#[derive(Clone, Debug)]
93pub struct ScanProjectOptions {
94 pub include_hidden: bool,
96 pub respect_gitignore: bool,
98 pub max_files: usize,
100 pub include_git_history: bool,
102 pub repo_map_token_budget: usize,
104}
105
106impl Default for ScanProjectOptions {
107 fn default() -> Self {
108 Self {
109 include_hidden: false,
110 respect_gitignore: true,
111 max_files: 0,
112 include_git_history: true,
113 repo_map_token_budget: 1200,
114 }
115 }
116}
117
118pub fn scan_project(root: &Path, opts: ScanProjectOptions) -> ScanResult {
120 scan_project_with_git(root, opts, &git::CliGitCapabilities)
121}
122
123pub fn scan_project_with_git(
129 root: &Path,
130 opts: ScanProjectOptions,
131 git: &dyn GitCapabilities,
132) -> ScanResult {
133 let canonical = canonicalize(root);
134 let discover_opts = discover::DiscoverOptions {
135 include_hidden: opts.include_hidden,
136 respect_gitignore: opts.respect_gitignore,
137 };
138 let mut discovered = discover::discover_files(&canonical, discover_opts, git);
139 let truncated = if opts.max_files > 0 && discovered.len() > opts.max_files {
140 discovered.truncate(opts.max_files);
141 true
142 } else {
143 false
144 };
145
146 let (mut files, mut symbols, mut dependencies) = extract_per_file(&discovered);
147
148 scoring::compute_reference_counts(&mut symbols, &files);
149
150 if opts.include_git_history {
151 let churn = git.churn_scores(&canonical);
152 scoring::apply_churn(&mut files, &churn);
153 }
154 scoring::compute_importance_scores(&mut symbols, &files);
155
156 test_mapping::map_test_files(&mut files);
157
158 let folder_records = folders::build_folder_records(&files, &symbols);
159 let test_commands = commands::detect_test_commands(&canonical);
160 let code_patterns = commands::detect_code_patterns(&files, &canonical);
161 let mut project = folders::build_project_metadata(
162 &canonical,
163 &files,
164 test_commands,
165 code_patterns,
166 now_iso8601(),
167 );
168 let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
169 let mut sub_projects = subproject::detect_subprojects(&canonical, 2);
170 attach_manifest_dependencies(&canonical, &mut project, &mut sub_projects);
171
172 sort_for_output(&mut files, &mut symbols, &mut dependencies);
173
174 let token = snapshot::root_to_token(&canonical);
175 let result = ScanResult {
176 snapshot_token: token,
177 truncated,
178 project,
179 folders: folder_records,
180 files,
181 symbols,
182 dependencies,
183 sub_projects,
184 repo_map,
185 };
186 snapshot::save(&canonical, &result);
187 result
188}
189
190#[derive(Clone, Debug)]
192pub struct IncrementalScan {
193 pub result: ScanResult,
195 pub delta: ScanDelta,
197}
198
199pub fn scan_incremental(
203 token: &str,
204 explicit_changed: Option<&[String]>,
205 opts: ScanProjectOptions,
206) -> IncrementalScan {
207 scan_incremental_with_git(token, explicit_changed, opts, &git::CliGitCapabilities)
208}
209
210pub fn scan_incremental_with_git(
212 token: &str,
213 explicit_changed: Option<&[String]>,
214 opts: ScanProjectOptions,
215 git: &dyn GitCapabilities,
216) -> IncrementalScan {
217 let root = snapshot::token_to_root(token);
218 let canonical = canonicalize(&root);
219
220 let cached = snapshot::load(&canonical);
221 let cached = match cached {
222 Some(c) => c,
223 None => {
224 let result = scan_project_with_git(&canonical, opts, git);
225 return IncrementalScan {
226 result,
227 delta: ScanDelta {
228 full_rescan: true,
229 ..ScanDelta::default()
230 },
231 };
232 }
233 };
234
235 let discover_opts = discover::DiscoverOptions {
236 include_hidden: opts.include_hidden,
237 respect_gitignore: opts.respect_gitignore,
238 };
239 let mut current = discover::discover_files(&canonical, discover_opts, git);
240 if opts.max_files > 0 && current.len() > opts.max_files {
241 current.truncate(opts.max_files);
242 }
243
244 let delta = compute_delta(¤t, &cached, explicit_changed);
245 let total = current.len();
246 let needs_full_rescan =
247 total > 0 && (delta.added.len() + delta.modified.len()) * 10 > total * 3;
248
249 if needs_full_rescan {
250 let result = scan_project_with_git(&canonical, opts, git);
251 return IncrementalScan {
252 result,
253 delta: ScanDelta {
254 full_rescan: true,
255 ..delta
256 },
257 };
258 }
259
260 if delta.added.is_empty() && delta.modified.is_empty() && delta.removed.is_empty() {
261 return IncrementalScan {
262 result: cached,
263 delta,
264 };
265 }
266
267 let mut files = cached.files;
269 let mut symbols = cached.symbols;
270 let mut dependencies = cached.dependencies;
271
272 let removed_set: std::collections::HashSet<&str> =
273 delta.removed.iter().map(|s| s.as_str()).collect();
274 let touched_set: std::collections::HashSet<&str> = delta
275 .added
276 .iter()
277 .chain(delta.modified.iter())
278 .map(|s| s.as_str())
279 .collect();
280
281 files.retain(|f| !removed_set.contains(f.relative_path.as_str()));
282 symbols.retain(|s| {
283 !removed_set.contains(s.file_path.as_str()) && !touched_set.contains(s.file_path.as_str())
284 });
285 dependencies.retain(|d| {
286 !removed_set.contains(d.from_file.as_str()) && !touched_set.contains(d.from_file.as_str())
287 });
288
289 let touched_entries: Vec<discover::DiscoveredFile> = current
290 .iter()
291 .filter(|e| touched_set.contains(e.relative_path.as_str()))
292 .cloned()
293 .collect();
294 let (new_files, new_symbols, new_deps) = extract_per_file(&touched_entries);
295
296 let mut by_path: std::collections::BTreeMap<String, FileRecord> = files
297 .into_iter()
298 .map(|f| (f.relative_path.clone(), f))
299 .collect();
300 for new_file in new_files {
301 by_path.insert(new_file.relative_path.clone(), new_file);
302 }
303 let mut files: Vec<FileRecord> = by_path.into_values().collect();
304 symbols.extend(new_symbols);
305 dependencies.extend(new_deps);
306
307 scoring::compute_reference_counts(&mut symbols, &files);
308 if opts.include_git_history {
309 let churn = git.churn_scores(&canonical);
310 scoring::apply_churn(&mut files, &churn);
311 }
312 scoring::compute_importance_scores(&mut symbols, &files);
313 test_mapping::map_test_files(&mut files);
314
315 let folder_records = folders::build_folder_records(&files, &symbols);
316 let test_commands = commands::detect_test_commands(&canonical);
317 let code_patterns = commands::detect_code_patterns(&files, &canonical);
318 let mut project = folders::build_project_metadata(
319 &canonical,
320 &files,
321 test_commands,
322 code_patterns,
323 now_iso8601(),
324 );
325 let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
326 let mut sub_projects = subproject::detect_subprojects(&canonical, 2);
327 attach_manifest_dependencies(&canonical, &mut project, &mut sub_projects);
328
329 sort_for_output(&mut files, &mut symbols, &mut dependencies);
330
331 let token = snapshot::root_to_token(&canonical);
332 let result = ScanResult {
333 snapshot_token: token,
334 truncated: cached.truncated,
335 project,
336 folders: folder_records,
337 files,
338 symbols,
339 dependencies,
340 sub_projects,
341 repo_map,
342 };
343 snapshot::save(&canonical, &result);
344 IncrementalScan { result, delta }
345}
346
347fn canonicalize(root: &Path) -> PathBuf {
350 std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
351}
352
353fn attach_manifest_dependencies(
358 canonical: &Path,
359 project: &mut ProjectMetadata,
360 sub_projects: &mut [SubProject],
361) {
362 project.available_dependencies = manifest::directory_dependencies(canonical);
363 for sp in sub_projects.iter_mut() {
364 sp.dependencies = manifest::directory_dependencies(Path::new(&sp.path));
365 }
366}
367
368fn extract_per_file(
369 discovered: &[discover::DiscoveredFile],
370) -> (Vec<FileRecord>, Vec<SymbolRecord>, Vec<DependencyEdge>) {
371 let mut files: Vec<FileRecord> = Vec::with_capacity(discovered.len());
372 let mut symbols: Vec<SymbolRecord> = Vec::new();
373 let mut dependencies: Vec<DependencyEdge> = Vec::new();
374
375 for entry in discovered {
376 let metadata = std::fs::metadata(&entry.absolute_path);
377 let size = metadata.as_ref().map(|m| m.len()).unwrap_or(0);
378 let modified = metadata
379 .as_ref()
380 .ok()
381 .and_then(|m| m.modified().ok())
382 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
383 .map(|d| d.as_millis() as i64)
384 .unwrap_or(0);
385
386 let content = std::fs::read_to_string(&entry.absolute_path).unwrap_or_default();
387 if content.is_empty() && size != 0 {
388 }
390 let language = extensions::file_extension(&entry.relative_path);
391 let imports = imports::extract_imports(&content, &language);
392 let file_symbols = symbols::extract_symbols(&content, &language, &entry.relative_path);
393 let line_count = crate::text::count_lines(content.as_bytes()) as usize;
394
395 for imp in &imports {
396 dependencies.push(DependencyEdge {
397 from_file: entry.relative_path.clone(),
398 to_module: imp.clone(),
399 });
400 }
401 symbols.extend(file_symbols);
402
403 files.push(FileRecord {
404 id: entry.relative_path.clone(),
405 relative_path: entry.relative_path.clone(),
406 file_name: extensions::file_name(&entry.relative_path).to_string(),
407 language,
408 line_count,
409 size_bytes: size,
410 last_modified_unix_ms: modified,
411 imports,
412 churn_score: 0.0,
413 corresponding_test_file: None,
414 });
415 }
416
417 (files, symbols, dependencies)
418}
419
420fn sort_for_output(
421 files: &mut [FileRecord],
422 symbols: &mut [SymbolRecord],
423 dependencies: &mut [DependencyEdge],
424) {
425 files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
426 symbols.sort_by(|a, b| a.id.cmp(&b.id));
427 dependencies.sort_by(|a, b| {
428 a.from_file
429 .cmp(&b.from_file)
430 .then_with(|| a.to_module.cmp(&b.to_module))
431 });
432}
433
434fn compute_delta(
435 current: &[discover::DiscoveredFile],
436 cached: &ScanResult,
437 explicit_changed: Option<&[String]>,
438) -> ScanDelta {
439 let cached_files: std::collections::BTreeMap<&str, &FileRecord> = cached
440 .files
441 .iter()
442 .map(|f| (f.relative_path.as_str(), f))
443 .collect();
444 let current_paths: std::collections::HashSet<&str> =
445 current.iter().map(|e| e.relative_path.as_str()).collect();
446
447 let added: Vec<String> = current
448 .iter()
449 .filter(|e| !cached_files.contains_key(e.relative_path.as_str()))
450 .map(|e| e.relative_path.clone())
451 .collect();
452 let removed: Vec<String> = cached
453 .files
454 .iter()
455 .filter(|f| !current_paths.contains(f.relative_path.as_str()))
456 .map(|f| f.relative_path.clone())
457 .collect();
458
459 let modified: Vec<String> = if let Some(explicit) = explicit_changed {
460 explicit
461 .iter()
462 .filter(|p| cached_files.contains_key(p.as_str()) && current_paths.contains(p.as_str()))
463 .cloned()
464 .collect()
465 } else {
466 let mut out = Vec::new();
467 for entry in current {
468 if let Some(prev) = cached_files.get(entry.relative_path.as_str()) {
469 let meta = std::fs::metadata(&entry.absolute_path).ok();
470 let mtime = meta
471 .as_ref()
472 .and_then(|m| m.modified().ok())
473 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
474 .map(|d| d.as_millis() as i64)
475 .unwrap_or(0);
476 let size = meta.as_ref().map(|m| m.len()).unwrap_or(prev.size_bytes);
477 if mtime > prev.last_modified_unix_ms || size != prev.size_bytes {
486 out.push(entry.relative_path.clone());
487 }
488 }
489 }
490 out
491 };
492
493 ScanDelta {
494 added,
495 modified,
496 removed,
497 full_rescan: false,
498 }
499}
500
501fn now_iso8601() -> String {
502 let now = SystemTime::now()
503 .duration_since(UNIX_EPOCH)
504 .unwrap_or_default();
505 let secs = now.as_secs() as i64;
506 let nanos = now.subsec_nanos();
507 let (year, month, day, hour, minute, second) = unix_to_civil(secs);
508 format!(
509 "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{millis:03}Z",
510 millis = nanos / 1_000_000
511 )
512}
513
514fn unix_to_civil(secs: i64) -> (i64, u32, u32, u32, u32, u32) {
518 let days = secs.div_euclid(86_400);
519 let day_secs = secs.rem_euclid(86_400);
520 let hour = (day_secs / 3600) as u32;
521 let minute = ((day_secs % 3600) / 60) as u32;
522 let second = (day_secs % 60) as u32;
523
524 let z = days + 719_468;
526 let era = z.div_euclid(146_097);
527 let doe = z.rem_euclid(146_097) as u64;
528 let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
529 let y = yoe as i64 + era * 400;
530 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
531 let mp = (5 * doy + 2) / 153;
532 let day = (doy - (153 * mp + 2) / 5 + 1) as u32;
533 let month = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
534 let year = if month <= 2 { y + 1 } else { y };
535 (year, month, day, hour, minute, second)
536}
537
538fn scan_project_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
541 let raw = dict_arg(SCAN_PROJECT_BUILTIN, args)?;
542 let dict = raw.as_ref();
543 let root = require_string(SCAN_PROJECT_BUILTIN, dict, "root")?;
544 let opts = parse_options(SCAN_PROJECT_BUILTIN, dict)?;
545 let result = scan_project(Path::new(&root), opts);
546 Ok(scan_result_to_value(&result, None))
547}
548
549fn scan_incremental_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
550 let raw = dict_arg(SCAN_INCREMENTAL_BUILTIN, args)?;
551 let dict = raw.as_ref();
552 let token = require_string(SCAN_INCREMENTAL_BUILTIN, dict, "snapshot_token")?;
553 let opts = parse_options(SCAN_INCREMENTAL_BUILTIN, dict)?;
554 let changed = parse_changed_paths(SCAN_INCREMENTAL_BUILTIN, dict)?;
555 let scan = scan_incremental(&token, changed.as_deref(), opts);
556 Ok(scan_result_to_value(&scan.result, Some(&scan.delta)))
557}
558
559fn parse_options(
560 builtin: &'static str,
561 dict: &harn_vm::value::DictMap,
562) -> Result<ScanProjectOptions, HostlibError> {
563 let include_hidden = optional_bool(builtin, dict, "include_hidden", false)?;
564 let respect_gitignore = optional_bool(builtin, dict, "respect_gitignore", true)?;
565 let max_files = optional_int(builtin, dict, "max_files", 0)?;
566 let include_git_history_default = builtin == SCAN_PROJECT_BUILTIN;
567 let include_git_history = optional_bool(
568 builtin,
569 dict,
570 "include_git_history",
571 include_git_history_default,
572 )?;
573 let repo_map_token_budget = optional_int(builtin, dict, "repo_map_token_budget", 1200)?;
574 if max_files < 0 {
575 return Err(HostlibError::InvalidParameter {
576 builtin,
577 param: "max_files",
578 message: "must be >= 0".to_string(),
579 });
580 }
581 if repo_map_token_budget < 0 {
582 return Err(HostlibError::InvalidParameter {
583 builtin,
584 param: "repo_map_token_budget",
585 message: "must be >= 0".to_string(),
586 });
587 }
588 Ok(ScanProjectOptions {
589 include_hidden,
590 respect_gitignore,
591 max_files: max_files as usize,
592 include_git_history,
593 repo_map_token_budget: repo_map_token_budget as usize,
594 })
595}
596
597fn parse_changed_paths(
598 builtin: &'static str,
599 dict: &harn_vm::value::DictMap,
600) -> Result<Option<Vec<String>>, HostlibError> {
601 let value = match dict.get("changed_paths") {
602 None | Some(VmValue::Nil) => return Ok(None),
603 Some(v) => v,
604 };
605 let list = match value {
606 VmValue::List(items) => items,
607 other => {
608 return Err(HostlibError::InvalidParameter {
609 builtin,
610 param: "changed_paths",
611 message: format!("expected list of strings, got {}", other.type_name()),
612 });
613 }
614 };
615 let mut out = Vec::with_capacity(list.len());
616 for item in list.iter() {
617 match item {
618 VmValue::String(s) => out.push(s.to_string()),
619 other => {
620 return Err(HostlibError::InvalidParameter {
621 builtin,
622 param: "changed_paths",
623 message: format!("non-string entry: {}", other.type_name()),
624 });
625 }
626 }
627 }
628 Ok(Some(out))
629}
630
631fn scan_result_to_value(result: &ScanResult, delta: Option<&ScanDelta>) -> VmValue {
632 let mut entries: Vec<(&'static str, VmValue)> = vec![
633 ("snapshot_token", str_value(&result.snapshot_token)),
634 ("truncated", VmValue::Bool(result.truncated)),
635 ("project", project_to_value(&result.project)),
636 ("folders", list_of(&result.folders, folder_to_value)),
637 ("files", list_of(&result.files, file_to_value)),
638 ("symbols", list_of(&result.symbols, symbol_to_value)),
639 (
640 "dependencies",
641 list_of(&result.dependencies, dependency_to_value),
642 ),
643 (
644 "sub_projects",
645 list_of(&result.sub_projects, subproject_to_value),
646 ),
647 ("repo_map", str_value(&result.repo_map)),
648 ];
649 if let Some(d) = delta {
650 entries.push(("delta", delta_to_value(d)));
651 }
652 build_dict(entries)
653}
654
655fn list_of<T>(items: &[T], to_value: fn(&T) -> VmValue) -> VmValue {
656 let list: Vec<VmValue> = items.iter().map(to_value).collect();
657 VmValue::List(Arc::new(list))
658}
659
660fn project_to_value(project: &ProjectMetadata) -> VmValue {
661 let test_commands_entries: Vec<(String, VmValue)> = project
662 .test_commands
663 .iter()
664 .map(|(k, v)| (k.clone(), str_value(v)))
665 .collect();
666 let test_commands_dict = build_dict(test_commands_entries);
667
668 let detected: VmValue = project
669 .detected_test_command
670 .as_deref()
671 .map(str_value)
672 .unwrap_or(VmValue::Nil);
673
674 let code_patterns: Vec<VmValue> = project.code_patterns.iter().map(str_value).collect();
675 let available_dependencies: Vec<VmValue> = project
676 .available_dependencies
677 .iter()
678 .map(str_value)
679 .collect();
680
681 build_dict([
682 ("name", str_value(&project.name)),
683 ("root_path", str_value(&project.root_path)),
684 ("languages", list_of(&project.languages, language_to_value)),
685 ("test_commands", test_commands_dict),
686 ("detected_test_command", detected),
687 ("code_patterns", VmValue::List(Arc::new(code_patterns))),
688 ("total_files", VmValue::Int(project.total_files as i64)),
689 ("total_lines", VmValue::Int(project.total_lines as i64)),
690 ("last_scanned_at", str_value(&project.last_scanned_at)),
691 (
692 "available_dependencies",
693 VmValue::List(Arc::new(available_dependencies)),
694 ),
695 ])
696}
697
698fn language_to_value(stat: &LanguageStat) -> VmValue {
699 build_dict([
700 ("name", str_value(&stat.name)),
701 ("file_count", VmValue::Int(stat.file_count as i64)),
702 ("line_count", VmValue::Int(stat.line_count as i64)),
703 ("percentage", VmValue::Float(stat.percentage)),
704 ])
705}
706
707fn folder_to_value(folder: &FolderRecord) -> VmValue {
708 let names: Vec<VmValue> = folder.key_symbol_names.iter().map(str_value).collect();
709 build_dict([
710 ("id", str_value(&folder.id)),
711 ("relative_path", str_value(&folder.relative_path)),
712 ("file_count", VmValue::Int(folder.file_count as i64)),
713 ("line_count", VmValue::Int(folder.line_count as i64)),
714 ("dominant_language", str_value(&folder.dominant_language)),
715 ("key_symbol_names", VmValue::List(Arc::new(names))),
716 ])
717}
718
719fn file_to_value(file: &FileRecord) -> VmValue {
720 let imports: Vec<VmValue> = file.imports.iter().map(str_value).collect();
721 let test_pair = file
722 .corresponding_test_file
723 .as_deref()
724 .map(str_value)
725 .unwrap_or(VmValue::Nil);
726 build_dict([
727 ("id", str_value(&file.id)),
728 ("relative_path", str_value(&file.relative_path)),
729 ("file_name", str_value(&file.file_name)),
730 ("language", str_value(&file.language)),
731 ("line_count", VmValue::Int(file.line_count as i64)),
732 ("size_bytes", VmValue::Int(file.size_bytes as i64)),
733 (
734 "last_modified_unix_ms",
735 VmValue::Int(file.last_modified_unix_ms),
736 ),
737 ("imports", VmValue::List(Arc::new(imports))),
738 ("churn_score", VmValue::Float(file.churn_score)),
739 ("corresponding_test_file", test_pair),
740 ])
741}
742
743fn symbol_to_value(symbol: &SymbolRecord) -> VmValue {
744 let container = symbol
745 .container
746 .as_deref()
747 .map(str_value)
748 .unwrap_or(VmValue::Nil);
749 build_dict([
750 ("id", str_value(&symbol.id)),
751 ("name", str_value(&symbol.name)),
752 ("kind", str_value(symbol.kind.keyword())),
753 ("file_path", str_value(&symbol.file_path)),
754 ("line", VmValue::Int(symbol.line as i64)),
755 ("signature", str_value(&symbol.signature)),
756 ("container", container),
757 (
758 "reference_count",
759 VmValue::Int(symbol.reference_count as i64),
760 ),
761 ("importance_score", VmValue::Float(symbol.importance_score)),
762 ])
763}
764
765fn dependency_to_value(dep: &DependencyEdge) -> VmValue {
766 build_dict([
767 ("from_file", str_value(&dep.from_file)),
768 ("to_module", str_value(&dep.to_module)),
769 ])
770}
771
772fn subproject_to_value(sp: &SubProject) -> VmValue {
773 let dependencies: Vec<VmValue> = sp.dependencies.iter().map(str_value).collect();
774 build_dict([
775 ("path", str_value(&sp.path)),
776 ("name", str_value(&sp.name)),
777 ("language", str_value(&sp.language)),
778 ("project_marker", str_value(&sp.project_marker)),
779 ("dependencies", VmValue::List(Arc::new(dependencies))),
780 ])
781}
782
783fn delta_to_value(delta: &ScanDelta) -> VmValue {
784 let added: Vec<VmValue> = delta.added.iter().map(str_value).collect();
785 let modified: Vec<VmValue> = delta.modified.iter().map(str_value).collect();
786 let removed: Vec<VmValue> = delta.removed.iter().map(str_value).collect();
787 build_dict([
788 ("added", VmValue::List(Arc::new(added))),
789 ("modified", VmValue::List(Arc::new(modified))),
790 ("removed", VmValue::List(Arc::new(removed))),
791 ("full_rescan", VmValue::Bool(delta.full_rescan)),
792 ])
793}
794
795#[cfg(test)]
796mod tests {
797 use super::*;
798 use filetime::{set_file_mtime, FileTime};
799 use std::fs;
800
801 #[test]
802 fn builtin_option_defaults_match_request_schemas() {
803 let dict = harn_vm::value::DictMap::new();
804
805 let scan_project = parse_options(SCAN_PROJECT_BUILTIN, &dict).unwrap();
806 let scan_incremental = parse_options(SCAN_INCREMENTAL_BUILTIN, &dict).unwrap();
807
808 assert!(scan_project.include_git_history);
809 assert!(!scan_incremental.include_git_history);
810 }
811
812 fn symbol_names(scan: &IncrementalScan) -> Vec<String> {
813 scan.result.symbols.iter().map(|s| s.name.clone()).collect()
814 }
815
816 #[test]
824 fn scan_incremental_detects_same_mtime_size_changing_edit() {
825 let dir = tempfile::tempdir().unwrap();
826 fs::create_dir_all(dir.path().join("src")).unwrap();
827 let file = dir.path().join("src/lib.rs");
828 fs::write(&file, "pub fn old_symbol() {}\n").unwrap();
829
830 let canonical = std::fs::canonicalize(dir.path()).unwrap();
832 let token = canonical.to_string_lossy().to_string();
833 let opts = ScanProjectOptions::default();
834
835 let first = scan_incremental(&token, None, opts.clone());
836 let cached_mtime = first
837 .result
838 .files
839 .iter()
840 .find(|r| r.relative_path == "src/lib.rs")
841 .expect("seed file indexed")
842 .last_modified_unix_ms;
843 assert!(symbol_names(&first).iter().any(|n| n == "old_symbol"));
844
845 fs::write(
849 &file,
850 "pub fn old_symbol() {}\npub fn brand_new_symbol() {}\n",
851 )
852 .unwrap();
853 let secs = cached_mtime / 1000;
854 let nanos = ((cached_mtime % 1000) * 1_000_000) as u32;
855 set_file_mtime(&file, FileTime::from_unix_time(secs, nanos)).unwrap();
856
857 let second = scan_incremental(&token, None, opts);
858 let names = symbol_names(&second);
859 assert!(
860 names.iter().any(|n| n == "brand_new_symbol"),
861 "same-mtime size-changing edit must be reindexed, got {names:?} (delta.modified={:?})",
862 second.delta.modified,
863 );
864 }
865
866 #[test]
873 fn scan_incremental_changed_paths_bypasses_metadata_heuristics() {
874 let dir = tempfile::tempdir().unwrap();
875 fs::create_dir_all(dir.path().join("src")).unwrap();
876 let file = dir.path().join("src/lib.rs");
877 fs::write(&file, "pub fn alpha_name() {}\n").unwrap();
879
880 let canonical = std::fs::canonicalize(dir.path()).unwrap();
881 let token = canonical.to_string_lossy().to_string();
882 let opts = ScanProjectOptions::default();
883
884 let first = scan_incremental(&token, None, opts.clone());
885 let cached_mtime = first
886 .result
887 .files
888 .iter()
889 .find(|r| r.relative_path == "src/lib.rs")
890 .expect("seed file indexed")
891 .last_modified_unix_ms;
892 assert!(symbol_names(&first).iter().any(|n| n == "alpha_name"));
893
894 fs::write(&file, "pub fn omega_name() {}\n").unwrap();
897 let secs = cached_mtime / 1000;
898 let nanos = ((cached_mtime % 1000) * 1_000_000) as u32;
899 set_file_mtime(&file, FileTime::from_unix_time(secs, nanos)).unwrap();
900
901 let heuristic_only = scan_incremental(&token, None, opts.clone());
904 assert!(
905 !heuristic_only
906 .delta
907 .modified
908 .contains(&"src/lib.rs".to_string()),
909 "documenting the heuristic's known blind spot",
910 );
911
912 let explicit = scan_incremental(&token, Some(&["src/lib.rs".to_string()]), opts);
915 assert!(
916 symbol_names(&explicit).iter().any(|n| n == "omega_name"),
917 "explicit changed_paths must always reindex, got {:?}",
918 symbol_names(&explicit),
919 );
920 }
921}