1use std::path::{Path, PathBuf};
17use std::process::Command;
18use std::sync::Arc;
19use std::time::{SystemTime, UNIX_EPOCH};
20
21use harn_vm::VmValue;
22
23use crate::error::HostlibError;
24use crate::registry::{BuiltinRegistry, HostlibCapability};
25use crate::tools::args::{
26 build_dict, dict_arg, optional_bool, optional_int, require_string, str_value,
27};
28
29mod commands;
30mod discover;
31mod extensions;
32mod folders;
33mod git;
34mod imports;
35mod manifest;
36mod result;
37mod scoring;
38mod snapshot;
39mod subproject;
40mod symbols;
41mod test_mapping;
42
43fn strip_ambient_git_env(cmd: &mut Command) {
44 for (key, _) in std::env::vars() {
47 if key.starts_with("GIT_") {
48 cmd.env_remove(&key);
49 }
50 }
51}
52
53pub use git::GitCapabilities;
54pub use result::{
55 DependencyEdge, FileRecord, FolderRecord, LanguageStat, ProjectMetadata, ScanDelta, ScanResult,
56 SubProject, SymbolKind, SymbolRecord,
57};
58
59const SCAN_PROJECT_BUILTIN: &str = "hostlib_scanner_scan_project";
60const SCAN_INCREMENTAL_BUILTIN: &str = "hostlib_scanner_scan_incremental";
61
62#[derive(Default)]
64pub struct ScannerCapability;
65
66impl HostlibCapability for ScannerCapability {
67 fn module_name(&self) -> &'static str {
68 "scanner"
69 }
70
71 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
72 registry.register_fn(
73 "scanner",
74 SCAN_PROJECT_BUILTIN,
75 "scan_project",
76 scan_project_handler,
77 );
78 registry.register_fn(
79 "scanner",
80 SCAN_INCREMENTAL_BUILTIN,
81 "scan_incremental",
82 scan_incremental_handler,
83 );
84 }
85}
86
87#[derive(Clone, Debug)]
91pub struct ScanProjectOptions {
92 pub include_hidden: bool,
94 pub respect_gitignore: bool,
96 pub max_files: usize,
98 pub include_git_history: bool,
100 pub repo_map_token_budget: usize,
102}
103
104impl Default for ScanProjectOptions {
105 fn default() -> Self {
106 Self {
107 include_hidden: false,
108 respect_gitignore: true,
109 max_files: 0,
110 include_git_history: true,
111 repo_map_token_budget: 1200,
112 }
113 }
114}
115
116pub fn scan_project(root: &Path, opts: ScanProjectOptions) -> ScanResult {
118 scan_project_with_git(root, opts, &git::CliGitCapabilities)
119}
120
121pub fn scan_project_with_git(
127 root: &Path,
128 opts: ScanProjectOptions,
129 git: &dyn GitCapabilities,
130) -> ScanResult {
131 let canonical = canonicalize(root);
132 let discover_opts = discover::DiscoverOptions {
133 include_hidden: opts.include_hidden,
134 respect_gitignore: opts.respect_gitignore,
135 };
136 let mut discovered = discover::discover_files(&canonical, discover_opts, git);
137 let truncated = if opts.max_files > 0 && discovered.len() > opts.max_files {
138 discovered.truncate(opts.max_files);
139 true
140 } else {
141 false
142 };
143
144 let (mut files, mut symbols, mut dependencies) = extract_per_file(&discovered);
145
146 scoring::compute_reference_counts(&mut symbols, &files);
147
148 if opts.include_git_history {
149 let churn = git.churn_scores(&canonical);
150 scoring::apply_churn(&mut files, &churn);
151 }
152 scoring::compute_importance_scores(&mut symbols, &files);
153
154 test_mapping::map_test_files(&mut files);
155
156 let folder_records = folders::build_folder_records(&files, &symbols);
157 let test_commands = commands::detect_test_commands(&canonical);
158 let code_patterns = commands::detect_code_patterns(&files, &canonical);
159 let mut project = folders::build_project_metadata(
160 &canonical,
161 &files,
162 test_commands,
163 code_patterns,
164 now_iso8601(),
165 );
166 let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
167 let mut sub_projects = subproject::detect_subprojects(&canonical, 2);
168 attach_manifest_dependencies(&canonical, &mut project, &mut sub_projects);
169
170 sort_for_output(&mut files, &mut symbols, &mut dependencies);
171
172 let token = snapshot::root_to_token(&canonical);
173 let result = ScanResult {
174 snapshot_token: token,
175 truncated,
176 project,
177 folders: folder_records,
178 files,
179 symbols,
180 dependencies,
181 sub_projects,
182 repo_map,
183 };
184 snapshot::save(&canonical, &result);
185 result
186}
187
188#[derive(Clone, Debug)]
190pub struct IncrementalScan {
191 pub result: ScanResult,
193 pub delta: ScanDelta,
195}
196
197pub fn scan_incremental(
201 token: &str,
202 explicit_changed: Option<&[String]>,
203 opts: ScanProjectOptions,
204) -> IncrementalScan {
205 scan_incremental_with_git(token, explicit_changed, opts, &git::CliGitCapabilities)
206}
207
208pub fn scan_incremental_with_git(
210 token: &str,
211 explicit_changed: Option<&[String]>,
212 opts: ScanProjectOptions,
213 git: &dyn GitCapabilities,
214) -> IncrementalScan {
215 let root = snapshot::token_to_root(token);
216 let canonical = canonicalize(&root);
217
218 let cached = snapshot::load(&canonical);
219 let cached = match cached {
220 Some(c) => c,
221 None => {
222 let result = scan_project_with_git(&canonical, opts, git);
223 return IncrementalScan {
224 result,
225 delta: ScanDelta {
226 full_rescan: true,
227 ..ScanDelta::default()
228 },
229 };
230 }
231 };
232
233 let discover_opts = discover::DiscoverOptions {
234 include_hidden: opts.include_hidden,
235 respect_gitignore: opts.respect_gitignore,
236 };
237 let mut current = discover::discover_files(&canonical, discover_opts, git);
238 if opts.max_files > 0 && current.len() > opts.max_files {
239 current.truncate(opts.max_files);
240 }
241
242 let delta = compute_delta(¤t, &cached, explicit_changed);
243 let total = current.len();
244 let needs_full_rescan =
245 total > 0 && (delta.added.len() + delta.modified.len()) * 10 > total * 3;
246
247 if needs_full_rescan {
248 let result = scan_project_with_git(&canonical, opts, git);
249 return IncrementalScan {
250 result,
251 delta: ScanDelta {
252 full_rescan: true,
253 ..delta
254 },
255 };
256 }
257
258 if delta.added.is_empty() && delta.modified.is_empty() && delta.removed.is_empty() {
259 return IncrementalScan {
260 result: cached,
261 delta,
262 };
263 }
264
265 let mut files = cached.files;
267 let mut symbols = cached.symbols;
268 let mut dependencies = cached.dependencies;
269
270 let removed_set: std::collections::HashSet<&str> =
271 delta.removed.iter().map(|s| s.as_str()).collect();
272 let touched_set: std::collections::HashSet<&str> = delta
273 .added
274 .iter()
275 .chain(delta.modified.iter())
276 .map(|s| s.as_str())
277 .collect();
278
279 files.retain(|f| !removed_set.contains(f.relative_path.as_str()));
280 symbols.retain(|s| {
281 !removed_set.contains(s.file_path.as_str()) && !touched_set.contains(s.file_path.as_str())
282 });
283 dependencies.retain(|d| {
284 !removed_set.contains(d.from_file.as_str()) && !touched_set.contains(d.from_file.as_str())
285 });
286
287 let touched_entries: Vec<discover::DiscoveredFile> = current
288 .iter()
289 .filter(|e| touched_set.contains(e.relative_path.as_str()))
290 .cloned()
291 .collect();
292 let (new_files, new_symbols, new_deps) = extract_per_file(&touched_entries);
293
294 let mut by_path: std::collections::BTreeMap<String, FileRecord> = files
295 .into_iter()
296 .map(|f| (f.relative_path.clone(), f))
297 .collect();
298 for new_file in new_files {
299 by_path.insert(new_file.relative_path.clone(), new_file);
300 }
301 let mut files: Vec<FileRecord> = by_path.into_values().collect();
302 symbols.extend(new_symbols);
303 dependencies.extend(new_deps);
304
305 scoring::compute_reference_counts(&mut symbols, &files);
306 if opts.include_git_history {
307 let churn = git.churn_scores(&canonical);
308 scoring::apply_churn(&mut files, &churn);
309 }
310 scoring::compute_importance_scores(&mut symbols, &files);
311 test_mapping::map_test_files(&mut files);
312
313 let folder_records = folders::build_folder_records(&files, &symbols);
314 let test_commands = commands::detect_test_commands(&canonical);
315 let code_patterns = commands::detect_code_patterns(&files, &canonical);
316 let mut project = folders::build_project_metadata(
317 &canonical,
318 &files,
319 test_commands,
320 code_patterns,
321 now_iso8601(),
322 );
323 let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
324 let mut sub_projects = subproject::detect_subprojects(&canonical, 2);
325 attach_manifest_dependencies(&canonical, &mut project, &mut sub_projects);
326
327 sort_for_output(&mut files, &mut symbols, &mut dependencies);
328
329 let token = snapshot::root_to_token(&canonical);
330 let result = ScanResult {
331 snapshot_token: token,
332 truncated: cached.truncated,
333 project,
334 folders: folder_records,
335 files,
336 symbols,
337 dependencies,
338 sub_projects,
339 repo_map,
340 };
341 snapshot::save(&canonical, &result);
342 IncrementalScan { result, delta }
343}
344
345fn canonicalize(root: &Path) -> PathBuf {
348 std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
349}
350
351fn attach_manifest_dependencies(
356 canonical: &Path,
357 project: &mut ProjectMetadata,
358 sub_projects: &mut [SubProject],
359) {
360 project.available_dependencies = manifest::directory_dependencies(canonical);
361 for sp in sub_projects.iter_mut() {
362 sp.dependencies = manifest::directory_dependencies(Path::new(&sp.path));
363 }
364}
365
366fn extract_per_file(
367 discovered: &[discover::DiscoveredFile],
368) -> (Vec<FileRecord>, Vec<SymbolRecord>, Vec<DependencyEdge>) {
369 let mut files: Vec<FileRecord> = Vec::with_capacity(discovered.len());
370 let mut symbols: Vec<SymbolRecord> = Vec::new();
371 let mut dependencies: Vec<DependencyEdge> = Vec::new();
372
373 for entry in discovered {
374 let metadata = std::fs::metadata(&entry.absolute_path);
375 let size = metadata.as_ref().map(|m| m.len()).unwrap_or(0);
376 let modified = metadata
377 .as_ref()
378 .ok()
379 .and_then(|m| m.modified().ok())
380 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
381 .map(|d| d.as_millis() as i64)
382 .unwrap_or(0);
383
384 let content = std::fs::read_to_string(&entry.absolute_path).unwrap_or_default();
385 if content.is_empty() && size != 0 {
386 }
388 let language = extensions::file_extension(&entry.relative_path);
389 let imports = imports::extract_imports(&content, &language);
390 let file_symbols = symbols::extract_symbols(&content, &language, &entry.relative_path);
391 let line_count = crate::text::count_lines(content.as_bytes()) as usize;
392
393 for imp in &imports {
394 dependencies.push(DependencyEdge {
395 from_file: entry.relative_path.clone(),
396 to_module: imp.clone(),
397 });
398 }
399 symbols.extend(file_symbols);
400
401 files.push(FileRecord {
402 id: entry.relative_path.clone(),
403 relative_path: entry.relative_path.clone(),
404 file_name: extensions::file_name(&entry.relative_path).to_string(),
405 language,
406 line_count,
407 size_bytes: size,
408 last_modified_unix_ms: modified,
409 imports,
410 churn_score: 0.0,
411 corresponding_test_file: None,
412 });
413 }
414
415 (files, symbols, dependencies)
416}
417
418fn sort_for_output(
419 files: &mut [FileRecord],
420 symbols: &mut [SymbolRecord],
421 dependencies: &mut [DependencyEdge],
422) {
423 files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
424 symbols.sort_by(|a, b| a.id.cmp(&b.id));
425 dependencies.sort_by(|a, b| {
426 a.from_file
427 .cmp(&b.from_file)
428 .then_with(|| a.to_module.cmp(&b.to_module))
429 });
430}
431
432fn compute_delta(
433 current: &[discover::DiscoveredFile],
434 cached: &ScanResult,
435 explicit_changed: Option<&[String]>,
436) -> ScanDelta {
437 let cached_files: std::collections::BTreeMap<&str, &FileRecord> = cached
438 .files
439 .iter()
440 .map(|f| (f.relative_path.as_str(), f))
441 .collect();
442 let current_paths: std::collections::HashSet<&str> =
443 current.iter().map(|e| e.relative_path.as_str()).collect();
444
445 let added: Vec<String> = current
446 .iter()
447 .filter(|e| !cached_files.contains_key(e.relative_path.as_str()))
448 .map(|e| e.relative_path.clone())
449 .collect();
450 let removed: Vec<String> = cached
451 .files
452 .iter()
453 .filter(|f| !current_paths.contains(f.relative_path.as_str()))
454 .map(|f| f.relative_path.clone())
455 .collect();
456
457 let modified: Vec<String> = if let Some(explicit) = explicit_changed {
458 explicit
459 .iter()
460 .filter(|p| cached_files.contains_key(p.as_str()) && current_paths.contains(p.as_str()))
461 .cloned()
462 .collect()
463 } else {
464 let mut out = Vec::new();
465 for entry in current {
466 if let Some(prev) = cached_files.get(entry.relative_path.as_str()) {
467 let meta = std::fs::metadata(&entry.absolute_path).ok();
468 let mtime = meta
469 .as_ref()
470 .and_then(|m| m.modified().ok())
471 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
472 .map(|d| d.as_millis() as i64)
473 .unwrap_or(0);
474 let size = meta.as_ref().map(|m| m.len()).unwrap_or(prev.size_bytes);
475 if mtime > prev.last_modified_unix_ms || size != prev.size_bytes {
484 out.push(entry.relative_path.clone());
485 }
486 }
487 }
488 out
489 };
490
491 ScanDelta {
492 added,
493 modified,
494 removed,
495 full_rescan: false,
496 }
497}
498
499fn now_iso8601() -> String {
500 let now = SystemTime::now()
501 .duration_since(UNIX_EPOCH)
502 .unwrap_or_default();
503 let secs = now.as_secs() as i64;
504 let nanos = now.subsec_nanos();
505 let (year, month, day, hour, minute, second) = unix_to_civil(secs);
506 format!(
507 "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{millis:03}Z",
508 millis = nanos / 1_000_000
509 )
510}
511
512fn unix_to_civil(secs: i64) -> (i64, u32, u32, u32, u32, u32) {
516 let days = secs.div_euclid(86_400);
517 let day_secs = secs.rem_euclid(86_400);
518 let hour = (day_secs / 3600) as u32;
519 let minute = ((day_secs % 3600) / 60) as u32;
520 let second = (day_secs % 60) as u32;
521
522 let z = days + 719_468;
524 let era = z.div_euclid(146_097);
525 let doe = z.rem_euclid(146_097) as u64;
526 let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
527 let y = yoe as i64 + era * 400;
528 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
529 let mp = (5 * doy + 2) / 153;
530 let day = (doy - (153 * mp + 2) / 5 + 1) as u32;
531 let month = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
532 let year = if month <= 2 { y + 1 } else { y };
533 (year, month, day, hour, minute, second)
534}
535
536fn scan_project_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
539 let raw = dict_arg(SCAN_PROJECT_BUILTIN, args)?;
540 let dict = raw.as_ref();
541 let root = require_string(SCAN_PROJECT_BUILTIN, dict, "root")?;
542 let opts = parse_options(SCAN_PROJECT_BUILTIN, dict)?;
543 let result = scan_project(Path::new(&root), opts);
544 Ok(scan_result_to_value(&result, None))
545}
546
547fn scan_incremental_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
548 let raw = dict_arg(SCAN_INCREMENTAL_BUILTIN, args)?;
549 let dict = raw.as_ref();
550 let token = require_string(SCAN_INCREMENTAL_BUILTIN, dict, "snapshot_token")?;
551 let opts = parse_options(SCAN_INCREMENTAL_BUILTIN, dict)?;
552 let changed = parse_changed_paths(SCAN_INCREMENTAL_BUILTIN, dict)?;
553 let scan = scan_incremental(&token, changed.as_deref(), opts);
554 Ok(scan_result_to_value(&scan.result, Some(&scan.delta)))
555}
556
557fn parse_options(
558 builtin: &'static str,
559 dict: &harn_vm::value::DictMap,
560) -> Result<ScanProjectOptions, HostlibError> {
561 let include_hidden = optional_bool(builtin, dict, "include_hidden", false)?;
562 let respect_gitignore = optional_bool(builtin, dict, "respect_gitignore", true)?;
563 let max_files = optional_int(builtin, dict, "max_files", 0)?;
564 let include_git_history_default = builtin == SCAN_PROJECT_BUILTIN;
565 let include_git_history = optional_bool(
566 builtin,
567 dict,
568 "include_git_history",
569 include_git_history_default,
570 )?;
571 let repo_map_token_budget = optional_int(builtin, dict, "repo_map_token_budget", 1200)?;
572 if max_files < 0 {
573 return Err(HostlibError::InvalidParameter {
574 builtin,
575 param: "max_files",
576 message: "must be >= 0".to_string(),
577 });
578 }
579 if repo_map_token_budget < 0 {
580 return Err(HostlibError::InvalidParameter {
581 builtin,
582 param: "repo_map_token_budget",
583 message: "must be >= 0".to_string(),
584 });
585 }
586 Ok(ScanProjectOptions {
587 include_hidden,
588 respect_gitignore,
589 max_files: max_files as usize,
590 include_git_history,
591 repo_map_token_budget: repo_map_token_budget as usize,
592 })
593}
594
595fn parse_changed_paths(
596 builtin: &'static str,
597 dict: &harn_vm::value::DictMap,
598) -> Result<Option<Vec<String>>, HostlibError> {
599 let value = match dict.get("changed_paths") {
600 None | Some(VmValue::Nil) => return Ok(None),
601 Some(v) => v,
602 };
603 let list = match value {
604 VmValue::List(items) => items,
605 other => {
606 return Err(HostlibError::InvalidParameter {
607 builtin,
608 param: "changed_paths",
609 message: format!("expected list of strings, got {}", other.type_name()),
610 });
611 }
612 };
613 let mut out = Vec::with_capacity(list.len());
614 for item in list.iter() {
615 match item {
616 VmValue::String(s) => out.push(s.to_string()),
617 other => {
618 return Err(HostlibError::InvalidParameter {
619 builtin,
620 param: "changed_paths",
621 message: format!("non-string entry: {}", other.type_name()),
622 });
623 }
624 }
625 }
626 Ok(Some(out))
627}
628
629fn scan_result_to_value(result: &ScanResult, delta: Option<&ScanDelta>) -> VmValue {
630 let mut entries: Vec<(&'static str, VmValue)> = vec![
631 ("snapshot_token", str_value(&result.snapshot_token)),
632 ("truncated", VmValue::Bool(result.truncated)),
633 ("project", project_to_value(&result.project)),
634 ("folders", list_of(&result.folders, folder_to_value)),
635 ("files", list_of(&result.files, file_to_value)),
636 ("symbols", list_of(&result.symbols, symbol_to_value)),
637 (
638 "dependencies",
639 list_of(&result.dependencies, dependency_to_value),
640 ),
641 (
642 "sub_projects",
643 list_of(&result.sub_projects, subproject_to_value),
644 ),
645 ("repo_map", str_value(&result.repo_map)),
646 ];
647 if let Some(d) = delta {
648 entries.push(("delta", delta_to_value(d)));
649 }
650 build_dict(entries)
651}
652
653fn list_of<T>(items: &[T], to_value: fn(&T) -> VmValue) -> VmValue {
654 let list: Vec<VmValue> = items.iter().map(to_value).collect();
655 VmValue::List(Arc::new(list))
656}
657
658fn project_to_value(project: &ProjectMetadata) -> VmValue {
659 let test_commands_entries: Vec<(String, VmValue)> = project
660 .test_commands
661 .iter()
662 .map(|(k, v)| (k.clone(), str_value(v)))
663 .collect();
664 let test_commands_dict = build_dict(test_commands_entries);
665
666 let detected: VmValue = project
667 .detected_test_command
668 .as_deref()
669 .map(str_value)
670 .unwrap_or(VmValue::Nil);
671
672 let code_patterns: Vec<VmValue> = project.code_patterns.iter().map(str_value).collect();
673 let available_dependencies: Vec<VmValue> = project
674 .available_dependencies
675 .iter()
676 .map(str_value)
677 .collect();
678
679 build_dict([
680 ("name", str_value(&project.name)),
681 ("root_path", str_value(&project.root_path)),
682 ("languages", list_of(&project.languages, language_to_value)),
683 ("test_commands", test_commands_dict),
684 ("detected_test_command", detected),
685 ("code_patterns", VmValue::List(Arc::new(code_patterns))),
686 ("total_files", VmValue::Int(project.total_files as i64)),
687 ("total_lines", VmValue::Int(project.total_lines as i64)),
688 ("last_scanned_at", str_value(&project.last_scanned_at)),
689 (
690 "available_dependencies",
691 VmValue::List(Arc::new(available_dependencies)),
692 ),
693 ])
694}
695
696fn language_to_value(stat: &LanguageStat) -> VmValue {
697 build_dict([
698 ("name", str_value(&stat.name)),
699 ("file_count", VmValue::Int(stat.file_count as i64)),
700 ("line_count", VmValue::Int(stat.line_count as i64)),
701 ("percentage", VmValue::Float(stat.percentage)),
702 ])
703}
704
705fn folder_to_value(folder: &FolderRecord) -> VmValue {
706 let names: Vec<VmValue> = folder.key_symbol_names.iter().map(str_value).collect();
707 build_dict([
708 ("id", str_value(&folder.id)),
709 ("relative_path", str_value(&folder.relative_path)),
710 ("file_count", VmValue::Int(folder.file_count as i64)),
711 ("line_count", VmValue::Int(folder.line_count as i64)),
712 ("dominant_language", str_value(&folder.dominant_language)),
713 ("key_symbol_names", VmValue::List(Arc::new(names))),
714 ])
715}
716
717fn file_to_value(file: &FileRecord) -> VmValue {
718 let imports: Vec<VmValue> = file.imports.iter().map(str_value).collect();
719 let test_pair = file
720 .corresponding_test_file
721 .as_deref()
722 .map(str_value)
723 .unwrap_or(VmValue::Nil);
724 build_dict([
725 ("id", str_value(&file.id)),
726 ("relative_path", str_value(&file.relative_path)),
727 ("file_name", str_value(&file.file_name)),
728 ("language", str_value(&file.language)),
729 ("line_count", VmValue::Int(file.line_count as i64)),
730 ("size_bytes", VmValue::Int(file.size_bytes as i64)),
731 (
732 "last_modified_unix_ms",
733 VmValue::Int(file.last_modified_unix_ms),
734 ),
735 ("imports", VmValue::List(Arc::new(imports))),
736 ("churn_score", VmValue::Float(file.churn_score)),
737 ("corresponding_test_file", test_pair),
738 ])
739}
740
741fn symbol_to_value(symbol: &SymbolRecord) -> VmValue {
742 let container = symbol
743 .container
744 .as_deref()
745 .map(str_value)
746 .unwrap_or(VmValue::Nil);
747 build_dict([
748 ("id", str_value(&symbol.id)),
749 ("name", str_value(&symbol.name)),
750 ("kind", str_value(symbol.kind.keyword())),
751 ("file_path", str_value(&symbol.file_path)),
752 ("line", VmValue::Int(symbol.line as i64)),
753 ("signature", str_value(&symbol.signature)),
754 ("container", container),
755 (
756 "reference_count",
757 VmValue::Int(symbol.reference_count as i64),
758 ),
759 ("importance_score", VmValue::Float(symbol.importance_score)),
760 ])
761}
762
763fn dependency_to_value(dep: &DependencyEdge) -> VmValue {
764 build_dict([
765 ("from_file", str_value(&dep.from_file)),
766 ("to_module", str_value(&dep.to_module)),
767 ])
768}
769
770fn subproject_to_value(sp: &SubProject) -> VmValue {
771 let dependencies: Vec<VmValue> = sp.dependencies.iter().map(str_value).collect();
772 build_dict([
773 ("path", str_value(&sp.path)),
774 ("name", str_value(&sp.name)),
775 ("language", str_value(&sp.language)),
776 ("project_marker", str_value(&sp.project_marker)),
777 ("dependencies", VmValue::List(Arc::new(dependencies))),
778 ])
779}
780
781fn delta_to_value(delta: &ScanDelta) -> VmValue {
782 let added: Vec<VmValue> = delta.added.iter().map(str_value).collect();
783 let modified: Vec<VmValue> = delta.modified.iter().map(str_value).collect();
784 let removed: Vec<VmValue> = delta.removed.iter().map(str_value).collect();
785 build_dict([
786 ("added", VmValue::List(Arc::new(added))),
787 ("modified", VmValue::List(Arc::new(modified))),
788 ("removed", VmValue::List(Arc::new(removed))),
789 ("full_rescan", VmValue::Bool(delta.full_rescan)),
790 ])
791}
792
793#[cfg(test)]
794mod tests {
795 use super::*;
796 use filetime::{set_file_mtime, FileTime};
797 use std::fs;
798
799 #[test]
800 fn builtin_option_defaults_match_request_schemas() {
801 let dict = harn_vm::value::DictMap::new();
802
803 let scan_project = parse_options(SCAN_PROJECT_BUILTIN, &dict).unwrap();
804 let scan_incremental = parse_options(SCAN_INCREMENTAL_BUILTIN, &dict).unwrap();
805
806 assert!(scan_project.include_git_history);
807 assert!(!scan_incremental.include_git_history);
808 }
809
810 fn symbol_names(scan: &IncrementalScan) -> Vec<String> {
811 scan.result.symbols.iter().map(|s| s.name.clone()).collect()
812 }
813
814 #[test]
822 fn scan_incremental_detects_same_mtime_size_changing_edit() {
823 let dir = tempfile::tempdir().unwrap();
824 fs::create_dir_all(dir.path().join("src")).unwrap();
825 let file = dir.path().join("src/lib.rs");
826 fs::write(&file, "pub fn old_symbol() {}\n").unwrap();
827
828 let canonical = std::fs::canonicalize(dir.path()).unwrap();
830 let token = canonical.to_string_lossy().to_string();
831 let opts = ScanProjectOptions::default();
832
833 let first = scan_incremental(&token, None, opts.clone());
834 let cached_mtime = first
835 .result
836 .files
837 .iter()
838 .find(|r| r.relative_path == "src/lib.rs")
839 .expect("seed file indexed")
840 .last_modified_unix_ms;
841 assert!(symbol_names(&first).iter().any(|n| n == "old_symbol"));
842
843 fs::write(
847 &file,
848 "pub fn old_symbol() {}\npub fn brand_new_symbol() {}\n",
849 )
850 .unwrap();
851 let secs = cached_mtime / 1000;
852 let nanos = ((cached_mtime % 1000) * 1_000_000) as u32;
853 set_file_mtime(&file, FileTime::from_unix_time(secs, nanos)).unwrap();
854
855 let second = scan_incremental(&token, None, opts);
856 let names = symbol_names(&second);
857 assert!(
858 names.iter().any(|n| n == "brand_new_symbol"),
859 "same-mtime size-changing edit must be reindexed, got {names:?} (delta.modified={:?})",
860 second.delta.modified,
861 );
862 }
863
864 #[test]
871 fn scan_incremental_changed_paths_bypasses_metadata_heuristics() {
872 let dir = tempfile::tempdir().unwrap();
873 fs::create_dir_all(dir.path().join("src")).unwrap();
874 let file = dir.path().join("src/lib.rs");
875 fs::write(&file, "pub fn alpha_name() {}\n").unwrap();
877
878 let canonical = std::fs::canonicalize(dir.path()).unwrap();
879 let token = canonical.to_string_lossy().to_string();
880 let opts = ScanProjectOptions::default();
881
882 let first = scan_incremental(&token, None, opts.clone());
883 let cached_mtime = first
884 .result
885 .files
886 .iter()
887 .find(|r| r.relative_path == "src/lib.rs")
888 .expect("seed file indexed")
889 .last_modified_unix_ms;
890 assert!(symbol_names(&first).iter().any(|n| n == "alpha_name"));
891
892 fs::write(&file, "pub fn omega_name() {}\n").unwrap();
895 let secs = cached_mtime / 1000;
896 let nanos = ((cached_mtime % 1000) * 1_000_000) as u32;
897 set_file_mtime(&file, FileTime::from_unix_time(secs, nanos)).unwrap();
898
899 let heuristic_only = scan_incremental(&token, None, opts.clone());
902 assert!(
903 !heuristic_only
904 .delta
905 .modified
906 .contains(&"src/lib.rs".to_string()),
907 "documenting the heuristic's known blind spot",
908 );
909
910 let explicit = scan_incremental(&token, Some(&["src/lib.rs".to_string()]), opts);
913 assert!(
914 symbol_names(&explicit).iter().any(|n| n == "omega_name"),
915 "explicit changed_paths must always reindex, got {:?}",
916 symbol_names(&explicit),
917 );
918 }
919}