1use std::path::{Path, PathBuf};
17use std::process::Command;
18use std::sync::Arc;
19use std::time::{SystemTime, UNIX_EPOCH};
20
21use harn_vm::VmValue;
22
23use crate::error::HostlibError;
24use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
25use crate::tools::args::{
26 build_dict, dict_arg, optional_bool, optional_int, require_string, str_value,
27};
28
29mod commands;
30mod discover;
31mod extensions;
32mod folders;
33mod git;
34mod imports;
35mod result;
36mod scoring;
37mod snapshot;
38mod subproject;
39mod symbols;
40mod test_mapping;
41
42fn strip_ambient_git_env(cmd: &mut Command) {
43 for (key, _) in std::env::vars() {
46 if key.starts_with("GIT_") {
47 cmd.env_remove(&key);
48 }
49 }
50}
51
52pub use git::GitCapabilities;
53pub use result::{
54 DependencyEdge, FileRecord, FolderRecord, LanguageStat, ProjectMetadata, ScanDelta, ScanResult,
55 SubProject, SymbolKind, SymbolRecord,
56};
57
58const SCAN_PROJECT_BUILTIN: &str = "hostlib_scanner_scan_project";
59const SCAN_INCREMENTAL_BUILTIN: &str = "hostlib_scanner_scan_incremental";
60
61#[derive(Default)]
63pub struct ScannerCapability;
64
65impl HostlibCapability for ScannerCapability {
66 fn module_name(&self) -> &'static str {
67 "scanner"
68 }
69
70 fn register_builtins(&self, registry: &mut BuiltinRegistry) {
71 let scan_project: SyncHandler = Arc::new(scan_project_handler);
72 registry.register(RegisteredBuiltin {
73 name: SCAN_PROJECT_BUILTIN,
74 module: "scanner",
75 method: "scan_project",
76 handler: scan_project,
77 });
78 let scan_incremental: SyncHandler = Arc::new(scan_incremental_handler);
79 registry.register(RegisteredBuiltin {
80 name: SCAN_INCREMENTAL_BUILTIN,
81 module: "scanner",
82 method: "scan_incremental",
83 handler: scan_incremental,
84 });
85 }
86}
87
88#[derive(Clone, Debug)]
92pub struct ScanProjectOptions {
93 pub include_hidden: bool,
95 pub respect_gitignore: bool,
97 pub max_files: usize,
99 pub include_git_history: bool,
101 pub repo_map_token_budget: usize,
103}
104
105impl Default for ScanProjectOptions {
106 fn default() -> Self {
107 Self {
108 include_hidden: false,
109 respect_gitignore: true,
110 max_files: 0,
111 include_git_history: true,
112 repo_map_token_budget: 1200,
113 }
114 }
115}
116
117pub fn scan_project(root: &Path, opts: ScanProjectOptions) -> ScanResult {
119 scan_project_with_git(root, opts, &git::CliGitCapabilities)
120}
121
122pub fn scan_project_with_git(
128 root: &Path,
129 opts: ScanProjectOptions,
130 git: &dyn GitCapabilities,
131) -> ScanResult {
132 let canonical = canonicalize(root);
133 let discover_opts = discover::DiscoverOptions {
134 include_hidden: opts.include_hidden,
135 respect_gitignore: opts.respect_gitignore,
136 };
137 let mut discovered = discover::discover_files(&canonical, discover_opts, git);
138 let truncated = if opts.max_files > 0 && discovered.len() > opts.max_files {
139 discovered.truncate(opts.max_files);
140 true
141 } else {
142 false
143 };
144
145 let (mut files, mut symbols, mut dependencies) = extract_per_file(&discovered);
146
147 scoring::compute_reference_counts(&mut symbols, &files);
148
149 if opts.include_git_history {
150 let churn = git.churn_scores(&canonical);
151 scoring::apply_churn(&mut files, &churn);
152 }
153 scoring::compute_importance_scores(&mut symbols, &files);
154
155 test_mapping::map_test_files(&mut files);
156
157 let folder_records = folders::build_folder_records(&files, &symbols);
158 let test_commands = commands::detect_test_commands(&canonical);
159 let code_patterns = commands::detect_code_patterns(&files, &canonical);
160 let project = folders::build_project_metadata(
161 &canonical,
162 &files,
163 test_commands,
164 code_patterns,
165 now_iso8601(),
166 );
167 let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
168 let sub_projects = subproject::detect_subprojects(&canonical, 2);
169
170 sort_for_output(&mut files, &mut symbols, &mut dependencies);
171
172 let token = snapshot::root_to_token(&canonical);
173 let result = ScanResult {
174 snapshot_token: token,
175 truncated,
176 project,
177 folders: folder_records,
178 files,
179 symbols,
180 dependencies,
181 sub_projects,
182 repo_map,
183 };
184 snapshot::save(&canonical, &result);
185 result
186}
187
188#[derive(Clone, Debug)]
190pub struct IncrementalScan {
191 pub result: ScanResult,
193 pub delta: ScanDelta,
195}
196
197pub fn scan_incremental(
201 token: &str,
202 explicit_changed: Option<&[String]>,
203 opts: ScanProjectOptions,
204) -> IncrementalScan {
205 scan_incremental_with_git(token, explicit_changed, opts, &git::CliGitCapabilities)
206}
207
208pub fn scan_incremental_with_git(
210 token: &str,
211 explicit_changed: Option<&[String]>,
212 opts: ScanProjectOptions,
213 git: &dyn GitCapabilities,
214) -> IncrementalScan {
215 let root = snapshot::token_to_root(token);
216 let canonical = canonicalize(&root);
217
218 let cached = snapshot::load(&canonical);
219 let cached = match cached {
220 Some(c) => c,
221 None => {
222 let result = scan_project_with_git(&canonical, opts, git);
223 return IncrementalScan {
224 result,
225 delta: ScanDelta {
226 full_rescan: true,
227 ..ScanDelta::default()
228 },
229 };
230 }
231 };
232
233 let discover_opts = discover::DiscoverOptions {
234 include_hidden: opts.include_hidden,
235 respect_gitignore: opts.respect_gitignore,
236 };
237 let mut current = discover::discover_files(&canonical, discover_opts, git);
238 if opts.max_files > 0 && current.len() > opts.max_files {
239 current.truncate(opts.max_files);
240 }
241
242 let delta = compute_delta(¤t, &cached, explicit_changed);
243 let total = current.len();
244 let needs_full_rescan =
245 total > 0 && (delta.added.len() + delta.modified.len()) * 10 > total * 3;
246
247 if needs_full_rescan {
248 let result = scan_project_with_git(&canonical, opts, git);
249 return IncrementalScan {
250 result,
251 delta: ScanDelta {
252 full_rescan: true,
253 ..delta
254 },
255 };
256 }
257
258 if delta.added.is_empty() && delta.modified.is_empty() && delta.removed.is_empty() {
259 return IncrementalScan {
260 result: cached,
261 delta,
262 };
263 }
264
265 let mut files = cached.files;
267 let mut symbols = cached.symbols;
268 let mut dependencies = cached.dependencies;
269
270 let removed_set: std::collections::HashSet<&str> =
271 delta.removed.iter().map(|s| s.as_str()).collect();
272 let touched_set: std::collections::HashSet<&str> = delta
273 .added
274 .iter()
275 .chain(delta.modified.iter())
276 .map(|s| s.as_str())
277 .collect();
278
279 files.retain(|f| !removed_set.contains(f.relative_path.as_str()));
280 symbols.retain(|s| {
281 !removed_set.contains(s.file_path.as_str()) && !touched_set.contains(s.file_path.as_str())
282 });
283 dependencies.retain(|d| {
284 !removed_set.contains(d.from_file.as_str()) && !touched_set.contains(d.from_file.as_str())
285 });
286
287 let touched_entries: Vec<discover::DiscoveredFile> = current
288 .iter()
289 .filter(|e| touched_set.contains(e.relative_path.as_str()))
290 .cloned()
291 .collect();
292 let (new_files, new_symbols, new_deps) = extract_per_file(&touched_entries);
293
294 let mut by_path: std::collections::BTreeMap<String, FileRecord> = files
295 .into_iter()
296 .map(|f| (f.relative_path.clone(), f))
297 .collect();
298 for new_file in new_files {
299 by_path.insert(new_file.relative_path.clone(), new_file);
300 }
301 let mut files: Vec<FileRecord> = by_path.into_values().collect();
302 symbols.extend(new_symbols);
303 dependencies.extend(new_deps);
304
305 scoring::compute_reference_counts(&mut symbols, &files);
306 if opts.include_git_history {
307 let churn = git.churn_scores(&canonical);
308 scoring::apply_churn(&mut files, &churn);
309 }
310 scoring::compute_importance_scores(&mut symbols, &files);
311 test_mapping::map_test_files(&mut files);
312
313 let folder_records = folders::build_folder_records(&files, &symbols);
314 let test_commands = commands::detect_test_commands(&canonical);
315 let code_patterns = commands::detect_code_patterns(&files, &canonical);
316 let project = folders::build_project_metadata(
317 &canonical,
318 &files,
319 test_commands,
320 code_patterns,
321 now_iso8601(),
322 );
323 let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
324 let sub_projects = subproject::detect_subprojects(&canonical, 2);
325
326 sort_for_output(&mut files, &mut symbols, &mut dependencies);
327
328 let token = snapshot::root_to_token(&canonical);
329 let result = ScanResult {
330 snapshot_token: token,
331 truncated: cached.truncated,
332 project,
333 folders: folder_records,
334 files,
335 symbols,
336 dependencies,
337 sub_projects,
338 repo_map,
339 };
340 snapshot::save(&canonical, &result);
341 IncrementalScan { result, delta }
342}
343
344fn canonicalize(root: &Path) -> PathBuf {
347 std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
348}
349
350fn extract_per_file(
351 discovered: &[discover::DiscoveredFile],
352) -> (Vec<FileRecord>, Vec<SymbolRecord>, Vec<DependencyEdge>) {
353 let mut files: Vec<FileRecord> = Vec::with_capacity(discovered.len());
354 let mut symbols: Vec<SymbolRecord> = Vec::new();
355 let mut dependencies: Vec<DependencyEdge> = Vec::new();
356
357 for entry in discovered {
358 let metadata = std::fs::metadata(&entry.absolute_path);
359 let size = metadata.as_ref().map(|m| m.len()).unwrap_or(0);
360 let modified = metadata
361 .as_ref()
362 .ok()
363 .and_then(|m| m.modified().ok())
364 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
365 .map(|d| d.as_millis() as i64)
366 .unwrap_or(0);
367
368 let content = std::fs::read_to_string(&entry.absolute_path).unwrap_or_default();
369 if content.is_empty() && size != 0 {
370 }
372 let language = extensions::file_extension(&entry.relative_path);
373 let imports = imports::extract_imports(&content, &language);
374 let file_symbols = symbols::extract_symbols(&content, &language, &entry.relative_path);
375 let line_count = crate::text::count_lines(content.as_bytes()) as usize;
376
377 for imp in &imports {
378 dependencies.push(DependencyEdge {
379 from_file: entry.relative_path.clone(),
380 to_module: imp.clone(),
381 });
382 }
383 symbols.extend(file_symbols);
384
385 files.push(FileRecord {
386 id: entry.relative_path.clone(),
387 relative_path: entry.relative_path.clone(),
388 file_name: extensions::file_name(&entry.relative_path).to_string(),
389 language,
390 line_count,
391 size_bytes: size,
392 last_modified_unix_ms: modified,
393 imports,
394 churn_score: 0.0,
395 corresponding_test_file: None,
396 });
397 }
398
399 (files, symbols, dependencies)
400}
401
402fn sort_for_output(
403 files: &mut [FileRecord],
404 symbols: &mut [SymbolRecord],
405 dependencies: &mut [DependencyEdge],
406) {
407 files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
408 symbols.sort_by(|a, b| a.id.cmp(&b.id));
409 dependencies.sort_by(|a, b| {
410 a.from_file
411 .cmp(&b.from_file)
412 .then_with(|| a.to_module.cmp(&b.to_module))
413 });
414}
415
416fn compute_delta(
417 current: &[discover::DiscoveredFile],
418 cached: &ScanResult,
419 explicit_changed: Option<&[String]>,
420) -> ScanDelta {
421 let cached_files: std::collections::BTreeMap<&str, &FileRecord> = cached
422 .files
423 .iter()
424 .map(|f| (f.relative_path.as_str(), f))
425 .collect();
426 let current_paths: std::collections::HashSet<&str> =
427 current.iter().map(|e| e.relative_path.as_str()).collect();
428
429 let added: Vec<String> = current
430 .iter()
431 .filter(|e| !cached_files.contains_key(e.relative_path.as_str()))
432 .map(|e| e.relative_path.clone())
433 .collect();
434 let removed: Vec<String> = cached
435 .files
436 .iter()
437 .filter(|f| !current_paths.contains(f.relative_path.as_str()))
438 .map(|f| f.relative_path.clone())
439 .collect();
440
441 let modified: Vec<String> = if let Some(explicit) = explicit_changed {
442 explicit
443 .iter()
444 .filter(|p| cached_files.contains_key(p.as_str()) && current_paths.contains(p.as_str()))
445 .cloned()
446 .collect()
447 } else {
448 let mut out = Vec::new();
449 for entry in current {
450 if let Some(prev) = cached_files.get(entry.relative_path.as_str()) {
451 let meta = std::fs::metadata(&entry.absolute_path).ok();
452 let mtime = meta
453 .as_ref()
454 .and_then(|m| m.modified().ok())
455 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
456 .map(|d| d.as_millis() as i64)
457 .unwrap_or(0);
458 let size = meta.as_ref().map(|m| m.len()).unwrap_or(prev.size_bytes);
459 if mtime > prev.last_modified_unix_ms || size != prev.size_bytes {
468 out.push(entry.relative_path.clone());
469 }
470 }
471 }
472 out
473 };
474
475 ScanDelta {
476 added,
477 modified,
478 removed,
479 full_rescan: false,
480 }
481}
482
483fn now_iso8601() -> String {
484 let now = SystemTime::now()
485 .duration_since(UNIX_EPOCH)
486 .unwrap_or_default();
487 let secs = now.as_secs() as i64;
488 let nanos = now.subsec_nanos();
489 let (year, month, day, hour, minute, second) = unix_to_civil(secs);
490 format!(
491 "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{millis:03}Z",
492 millis = nanos / 1_000_000
493 )
494}
495
496fn unix_to_civil(secs: i64) -> (i64, u32, u32, u32, u32, u32) {
500 let days = secs.div_euclid(86_400);
501 let day_secs = secs.rem_euclid(86_400);
502 let hour = (day_secs / 3600) as u32;
503 let minute = ((day_secs % 3600) / 60) as u32;
504 let second = (day_secs % 60) as u32;
505
506 let z = days + 719_468;
508 let era = z.div_euclid(146_097);
509 let doe = z.rem_euclid(146_097) as u64;
510 let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
511 let y = yoe as i64 + era * 400;
512 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
513 let mp = (5 * doy + 2) / 153;
514 let day = (doy - (153 * mp + 2) / 5 + 1) as u32;
515 let month = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
516 let year = if month <= 2 { y + 1 } else { y };
517 (year, month, day, hour, minute, second)
518}
519
520fn scan_project_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
523 let raw = dict_arg(SCAN_PROJECT_BUILTIN, args)?;
524 let dict = raw.as_ref();
525 let root = require_string(SCAN_PROJECT_BUILTIN, dict, "root")?;
526 let opts = parse_options(SCAN_PROJECT_BUILTIN, dict)?;
527 let result = scan_project(Path::new(&root), opts);
528 Ok(scan_result_to_value(&result, None))
529}
530
531fn scan_incremental_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
532 let raw = dict_arg(SCAN_INCREMENTAL_BUILTIN, args)?;
533 let dict = raw.as_ref();
534 let token = require_string(SCAN_INCREMENTAL_BUILTIN, dict, "snapshot_token")?;
535 let opts = parse_options(SCAN_INCREMENTAL_BUILTIN, dict)?;
536 let changed = parse_changed_paths(SCAN_INCREMENTAL_BUILTIN, dict)?;
537 let scan = scan_incremental(&token, changed.as_deref(), opts);
538 Ok(scan_result_to_value(&scan.result, Some(&scan.delta)))
539}
540
541fn parse_options(
542 builtin: &'static str,
543 dict: &std::collections::BTreeMap<String, VmValue>,
544) -> Result<ScanProjectOptions, HostlibError> {
545 let include_hidden = optional_bool(builtin, dict, "include_hidden", false)?;
546 let respect_gitignore = optional_bool(builtin, dict, "respect_gitignore", true)?;
547 let max_files = optional_int(builtin, dict, "max_files", 0)?;
548 let include_git_history_default = builtin == SCAN_PROJECT_BUILTIN;
549 let include_git_history = optional_bool(
550 builtin,
551 dict,
552 "include_git_history",
553 include_git_history_default,
554 )?;
555 let repo_map_token_budget = optional_int(builtin, dict, "repo_map_token_budget", 1200)?;
556 if max_files < 0 {
557 return Err(HostlibError::InvalidParameter {
558 builtin,
559 param: "max_files",
560 message: "must be >= 0".to_string(),
561 });
562 }
563 if repo_map_token_budget < 0 {
564 return Err(HostlibError::InvalidParameter {
565 builtin,
566 param: "repo_map_token_budget",
567 message: "must be >= 0".to_string(),
568 });
569 }
570 Ok(ScanProjectOptions {
571 include_hidden,
572 respect_gitignore,
573 max_files: max_files as usize,
574 include_git_history,
575 repo_map_token_budget: repo_map_token_budget as usize,
576 })
577}
578
579fn parse_changed_paths(
580 builtin: &'static str,
581 dict: &std::collections::BTreeMap<String, VmValue>,
582) -> Result<Option<Vec<String>>, HostlibError> {
583 let value = match dict.get("changed_paths") {
584 None | Some(VmValue::Nil) => return Ok(None),
585 Some(v) => v,
586 };
587 let list = match value {
588 VmValue::List(items) => items,
589 other => {
590 return Err(HostlibError::InvalidParameter {
591 builtin,
592 param: "changed_paths",
593 message: format!("expected list of strings, got {}", other.type_name()),
594 });
595 }
596 };
597 let mut out = Vec::with_capacity(list.len());
598 for item in list.iter() {
599 match item {
600 VmValue::String(s) => out.push(s.to_string()),
601 other => {
602 return Err(HostlibError::InvalidParameter {
603 builtin,
604 param: "changed_paths",
605 message: format!("non-string entry: {}", other.type_name()),
606 });
607 }
608 }
609 }
610 Ok(Some(out))
611}
612
613fn scan_result_to_value(result: &ScanResult, delta: Option<&ScanDelta>) -> VmValue {
614 let mut entries: Vec<(&'static str, VmValue)> = vec![
615 ("snapshot_token", str_value(&result.snapshot_token)),
616 ("truncated", VmValue::Bool(result.truncated)),
617 ("project", project_to_value(&result.project)),
618 ("folders", list_of(&result.folders, folder_to_value)),
619 ("files", list_of(&result.files, file_to_value)),
620 ("symbols", list_of(&result.symbols, symbol_to_value)),
621 (
622 "dependencies",
623 list_of(&result.dependencies, dependency_to_value),
624 ),
625 (
626 "sub_projects",
627 list_of(&result.sub_projects, subproject_to_value),
628 ),
629 ("repo_map", str_value(&result.repo_map)),
630 ];
631 if let Some(d) = delta {
632 entries.push(("delta", delta_to_value(d)));
633 }
634 build_dict(entries)
635}
636
637fn list_of<T>(items: &[T], to_value: fn(&T) -> VmValue) -> VmValue {
638 let list: Vec<VmValue> = items.iter().map(to_value).collect();
639 VmValue::List(Arc::new(list))
640}
641
642fn project_to_value(project: &ProjectMetadata) -> VmValue {
643 let test_commands_entries: Vec<(String, VmValue)> = project
644 .test_commands
645 .iter()
646 .map(|(k, v)| (k.clone(), str_value(v)))
647 .collect();
648 let test_commands_dict = build_dict(test_commands_entries);
649
650 let detected: VmValue = project
651 .detected_test_command
652 .as_deref()
653 .map(str_value)
654 .unwrap_or(VmValue::Nil);
655
656 let code_patterns: Vec<VmValue> = project.code_patterns.iter().map(str_value).collect();
657
658 build_dict([
659 ("name", str_value(&project.name)),
660 ("root_path", str_value(&project.root_path)),
661 ("languages", list_of(&project.languages, language_to_value)),
662 ("test_commands", test_commands_dict),
663 ("detected_test_command", detected),
664 ("code_patterns", VmValue::List(Arc::new(code_patterns))),
665 ("total_files", VmValue::Int(project.total_files as i64)),
666 ("total_lines", VmValue::Int(project.total_lines as i64)),
667 ("last_scanned_at", str_value(&project.last_scanned_at)),
668 ])
669}
670
671fn language_to_value(stat: &LanguageStat) -> VmValue {
672 build_dict([
673 ("name", str_value(&stat.name)),
674 ("file_count", VmValue::Int(stat.file_count as i64)),
675 ("line_count", VmValue::Int(stat.line_count as i64)),
676 ("percentage", VmValue::Float(stat.percentage)),
677 ])
678}
679
680fn folder_to_value(folder: &FolderRecord) -> VmValue {
681 let names: Vec<VmValue> = folder.key_symbol_names.iter().map(str_value).collect();
682 build_dict([
683 ("id", str_value(&folder.id)),
684 ("relative_path", str_value(&folder.relative_path)),
685 ("file_count", VmValue::Int(folder.file_count as i64)),
686 ("line_count", VmValue::Int(folder.line_count as i64)),
687 ("dominant_language", str_value(&folder.dominant_language)),
688 ("key_symbol_names", VmValue::List(Arc::new(names))),
689 ])
690}
691
692fn file_to_value(file: &FileRecord) -> VmValue {
693 let imports: Vec<VmValue> = file.imports.iter().map(str_value).collect();
694 let test_pair = file
695 .corresponding_test_file
696 .as_deref()
697 .map(str_value)
698 .unwrap_or(VmValue::Nil);
699 build_dict([
700 ("id", str_value(&file.id)),
701 ("relative_path", str_value(&file.relative_path)),
702 ("file_name", str_value(&file.file_name)),
703 ("language", str_value(&file.language)),
704 ("line_count", VmValue::Int(file.line_count as i64)),
705 ("size_bytes", VmValue::Int(file.size_bytes as i64)),
706 (
707 "last_modified_unix_ms",
708 VmValue::Int(file.last_modified_unix_ms),
709 ),
710 ("imports", VmValue::List(Arc::new(imports))),
711 ("churn_score", VmValue::Float(file.churn_score)),
712 ("corresponding_test_file", test_pair),
713 ])
714}
715
716fn symbol_to_value(symbol: &SymbolRecord) -> VmValue {
717 let container = symbol
718 .container
719 .as_deref()
720 .map(str_value)
721 .unwrap_or(VmValue::Nil);
722 build_dict([
723 ("id", str_value(&symbol.id)),
724 ("name", str_value(&symbol.name)),
725 ("kind", str_value(symbol.kind.keyword())),
726 ("file_path", str_value(&symbol.file_path)),
727 ("line", VmValue::Int(symbol.line as i64)),
728 ("signature", str_value(&symbol.signature)),
729 ("container", container),
730 (
731 "reference_count",
732 VmValue::Int(symbol.reference_count as i64),
733 ),
734 ("importance_score", VmValue::Float(symbol.importance_score)),
735 ])
736}
737
738fn dependency_to_value(dep: &DependencyEdge) -> VmValue {
739 build_dict([
740 ("from_file", str_value(&dep.from_file)),
741 ("to_module", str_value(&dep.to_module)),
742 ])
743}
744
745fn subproject_to_value(sp: &SubProject) -> VmValue {
746 build_dict([
747 ("path", str_value(&sp.path)),
748 ("name", str_value(&sp.name)),
749 ("language", str_value(&sp.language)),
750 ("project_marker", str_value(&sp.project_marker)),
751 ])
752}
753
754fn delta_to_value(delta: &ScanDelta) -> VmValue {
755 let added: Vec<VmValue> = delta.added.iter().map(str_value).collect();
756 let modified: Vec<VmValue> = delta.modified.iter().map(str_value).collect();
757 let removed: Vec<VmValue> = delta.removed.iter().map(str_value).collect();
758 build_dict([
759 ("added", VmValue::List(Arc::new(added))),
760 ("modified", VmValue::List(Arc::new(modified))),
761 ("removed", VmValue::List(Arc::new(removed))),
762 ("full_rescan", VmValue::Bool(delta.full_rescan)),
763 ])
764}
765
766#[cfg(test)]
767mod tests {
768 use super::*;
769 use filetime::{set_file_mtime, FileTime};
770 use std::fs;
771
772 #[test]
773 fn builtin_option_defaults_match_request_schemas() {
774 let dict = std::collections::BTreeMap::new();
775
776 let scan_project = parse_options(SCAN_PROJECT_BUILTIN, &dict).unwrap();
777 let scan_incremental = parse_options(SCAN_INCREMENTAL_BUILTIN, &dict).unwrap();
778
779 assert!(scan_project.include_git_history);
780 assert!(!scan_incremental.include_git_history);
781 }
782
783 fn symbol_names(scan: &IncrementalScan) -> Vec<String> {
784 scan.result.symbols.iter().map(|s| s.name.clone()).collect()
785 }
786
787 #[test]
795 fn scan_incremental_detects_same_mtime_size_changing_edit() {
796 let dir = tempfile::tempdir().unwrap();
797 fs::create_dir_all(dir.path().join("src")).unwrap();
798 let file = dir.path().join("src/lib.rs");
799 fs::write(&file, "pub fn old_symbol() {}\n").unwrap();
800
801 let canonical = std::fs::canonicalize(dir.path()).unwrap();
803 let token = canonical.to_string_lossy().to_string();
804 let opts = ScanProjectOptions::default();
805
806 let first = scan_incremental(&token, None, opts.clone());
807 let cached_mtime = first
808 .result
809 .files
810 .iter()
811 .find(|r| r.relative_path == "src/lib.rs")
812 .expect("seed file indexed")
813 .last_modified_unix_ms;
814 assert!(symbol_names(&first).iter().any(|n| n == "old_symbol"));
815
816 fs::write(
820 &file,
821 "pub fn old_symbol() {}\npub fn brand_new_symbol() {}\n",
822 )
823 .unwrap();
824 let secs = cached_mtime / 1000;
825 let nanos = ((cached_mtime % 1000) * 1_000_000) as u32;
826 set_file_mtime(&file, FileTime::from_unix_time(secs, nanos)).unwrap();
827
828 let second = scan_incremental(&token, None, opts);
829 let names = symbol_names(&second);
830 assert!(
831 names.iter().any(|n| n == "brand_new_symbol"),
832 "same-mtime size-changing edit must be reindexed, got {names:?} (delta.modified={:?})",
833 second.delta.modified,
834 );
835 }
836
837 #[test]
844 fn scan_incremental_changed_paths_bypasses_metadata_heuristics() {
845 let dir = tempfile::tempdir().unwrap();
846 fs::create_dir_all(dir.path().join("src")).unwrap();
847 let file = dir.path().join("src/lib.rs");
848 fs::write(&file, "pub fn alpha_name() {}\n").unwrap();
850
851 let canonical = std::fs::canonicalize(dir.path()).unwrap();
852 let token = canonical.to_string_lossy().to_string();
853 let opts = ScanProjectOptions::default();
854
855 let first = scan_incremental(&token, None, opts.clone());
856 let cached_mtime = first
857 .result
858 .files
859 .iter()
860 .find(|r| r.relative_path == "src/lib.rs")
861 .expect("seed file indexed")
862 .last_modified_unix_ms;
863 assert!(symbol_names(&first).iter().any(|n| n == "alpha_name"));
864
865 fs::write(&file, "pub fn omega_name() {}\n").unwrap();
868 let secs = cached_mtime / 1000;
869 let nanos = ((cached_mtime % 1000) * 1_000_000) as u32;
870 set_file_mtime(&file, FileTime::from_unix_time(secs, nanos)).unwrap();
871
872 let heuristic_only = scan_incremental(&token, None, opts.clone());
875 assert!(
876 !heuristic_only
877 .delta
878 .modified
879 .contains(&"src/lib.rs".to_string()),
880 "documenting the heuristic's known blind spot",
881 );
882
883 let explicit = scan_incremental(&token, Some(&["src/lib.rs".to_string()]), opts);
886 assert!(
887 symbol_names(&explicit).iter().any(|n| n == "omega_name"),
888 "explicit changed_paths must always reindex, got {:?}",
889 symbol_names(&explicit),
890 );
891 }
892}