Skip to main content

harn_hostlib/scanner/
mod.rs

1//! Repo scanner host capability.
2//!
3//! Deterministic project-wide file enumeration honoring `.gitignore` and
4//! the [`extensions::EXCLUDED_DIRS`] table, symbol extraction,
5//! import-derived dependency graph, reference + churn + importance
6//! scoring, source/test pairing, folder aggregates, project metadata
7//! (language stats + detected test commands + code-pattern hints),
8//! sub-project detection, and a token-budgeted text repo map.
9//!
10//! `scan_project` returns the full [`result::ScanResult`] alongside an
11//! opaque `snapshot_token` derived from the canonicalized root path. The
12//! result is persisted to `<root>/.harn/hostlib/scanner-snapshot.json` so
13//! that `scan_incremental` can diff against it later — without forcing the
14//! caller to pass the previous result back over the wire.
15
16use std::path::{Path, PathBuf};
17use std::process::Command;
18use std::rc::Rc;
19use std::sync::Arc;
20use std::time::{SystemTime, UNIX_EPOCH};
21
22use harn_vm::VmValue;
23
24use crate::error::HostlibError;
25use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
26use crate::tools::args::{
27    build_dict, dict_arg, optional_bool, optional_int, require_string, str_value,
28};
29
30mod commands;
31mod discover;
32mod extensions;
33mod folders;
34mod git;
35mod imports;
36mod result;
37mod scoring;
38mod snapshot;
39mod subproject;
40mod symbols;
41mod test_mapping;
42
43fn strip_ambient_git_env(cmd: &mut Command) {
44    // Git exports repository-specific GIT_* variables while running hooks.
45    // Scanner probes must honor their explicit `-C <root>` argument instead.
46    for (key, _) in std::env::vars() {
47        if key.starts_with("GIT_") {
48            cmd.env_remove(&key);
49        }
50    }
51}
52
53pub use git::GitCapabilities;
54pub use result::{
55    DependencyEdge, FileRecord, FolderRecord, LanguageStat, ProjectMetadata, ScanDelta, ScanResult,
56    SubProject, SymbolKind, SymbolRecord,
57};
58
59const SCAN_PROJECT_BUILTIN: &str = "hostlib_scanner_scan_project";
60const SCAN_INCREMENTAL_BUILTIN: &str = "hostlib_scanner_scan_incremental";
61
62/// Scanner capability handle.
63#[derive(Default)]
64pub struct ScannerCapability;
65
66impl HostlibCapability for ScannerCapability {
67    fn module_name(&self) -> &'static str {
68        "scanner"
69    }
70
71    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
72        let scan_project: SyncHandler = Arc::new(scan_project_handler);
73        registry.register(RegisteredBuiltin {
74            name: SCAN_PROJECT_BUILTIN,
75            module: "scanner",
76            method: "scan_project",
77            handler: scan_project,
78        });
79        let scan_incremental: SyncHandler = Arc::new(scan_incremental_handler);
80        registry.register(RegisteredBuiltin {
81            name: SCAN_INCREMENTAL_BUILTIN,
82            module: "scanner",
83            method: "scan_incremental",
84            handler: scan_incremental,
85        });
86    }
87}
88
89// MARK: - Public Rust API (used by tests + by harn-cli embedders).
90
91/// Tunable knobs accepted by [`scan_project`].
92#[derive(Clone, Debug)]
93pub struct ScanProjectOptions {
94    /// Include hidden (`.`) entries during walking.
95    pub include_hidden: bool,
96    /// Honor `.gitignore`.
97    pub respect_gitignore: bool,
98    /// Hard cap on file count (0 = unlimited).
99    pub max_files: usize,
100    /// Run `git log` to compute churn scores.
101    pub include_git_history: bool,
102    /// Approximate token budget for the text repo map.
103    pub repo_map_token_budget: usize,
104}
105
106impl Default for ScanProjectOptions {
107    fn default() -> Self {
108        Self {
109            include_hidden: false,
110            respect_gitignore: true,
111            max_files: 0,
112            include_git_history: true,
113            repo_map_token_budget: 1200,
114        }
115    }
116}
117
118/// Run a full scan of `root`, persist a snapshot, and return the result.
119pub fn scan_project(root: &Path, opts: ScanProjectOptions) -> ScanResult {
120    scan_project_with_git(root, opts, &git::CliGitCapabilities)
121}
122
123/// Run a full scan using caller-supplied Git data.
124///
125/// Embedders normally call [`scan_project`]. Tests and hosts that already
126/// virtualize Git can use this entry point to keep scanner behavior
127/// deterministic without depending on ambient process state.
128pub fn scan_project_with_git(
129    root: &Path,
130    opts: ScanProjectOptions,
131    git: &dyn GitCapabilities,
132) -> ScanResult {
133    let canonical = canonicalize(root);
134    let discover_opts = discover::DiscoverOptions {
135        include_hidden: opts.include_hidden,
136        respect_gitignore: opts.respect_gitignore,
137    };
138    let mut discovered = discover::discover_files(&canonical, discover_opts, git);
139    let truncated = if opts.max_files > 0 && discovered.len() > opts.max_files {
140        discovered.truncate(opts.max_files);
141        true
142    } else {
143        false
144    };
145
146    let (mut files, mut symbols, mut dependencies) = extract_per_file(&discovered);
147
148    scoring::compute_reference_counts(&mut symbols, &files);
149
150    if opts.include_git_history {
151        let churn = git.churn_scores(&canonical);
152        scoring::apply_churn(&mut files, &churn);
153    }
154    scoring::compute_importance_scores(&mut symbols, &files);
155
156    test_mapping::map_test_files(&mut files);
157
158    let folder_records = folders::build_folder_records(&files, &symbols);
159    let test_commands = commands::detect_test_commands(&canonical);
160    let code_patterns = commands::detect_code_patterns(&files, &canonical);
161    let project = folders::build_project_metadata(
162        &canonical,
163        &files,
164        test_commands,
165        code_patterns,
166        now_iso8601(),
167    );
168    let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
169    let sub_projects = subproject::detect_subprojects(&canonical, 2);
170
171    sort_for_output(&mut files, &mut symbols, &mut dependencies);
172
173    let token = snapshot::root_to_token(&canonical);
174    let result = ScanResult {
175        snapshot_token: token,
176        truncated,
177        project,
178        folders: folder_records,
179        files,
180        symbols,
181        dependencies,
182        sub_projects,
183        repo_map,
184    };
185    snapshot::save(&canonical, &result);
186    result
187}
188
189/// Result returned by [`scan_incremental`].
190#[derive(Clone, Debug)]
191pub struct IncrementalScan {
192    /// Refreshed scan result.
193    pub result: ScanResult,
194    /// Path delta computed against the snapshot.
195    pub delta: ScanDelta,
196}
197
198/// Refresh the snapshot named by `token`. If the snapshot is missing, the
199/// diff is too large (>30%), or `changed_paths` is empty after `>30%` of
200/// the workspace mtime-mismatched, falls back to a full rescan.
201pub fn scan_incremental(
202    token: &str,
203    explicit_changed: Option<&[String]>,
204    opts: ScanProjectOptions,
205) -> IncrementalScan {
206    scan_incremental_with_git(token, explicit_changed, opts, &git::CliGitCapabilities)
207}
208
209/// Refresh a snapshot using caller-supplied Git data.
210pub fn scan_incremental_with_git(
211    token: &str,
212    explicit_changed: Option<&[String]>,
213    opts: ScanProjectOptions,
214    git: &dyn GitCapabilities,
215) -> IncrementalScan {
216    let root = snapshot::token_to_root(token);
217    let canonical = canonicalize(&root);
218
219    let cached = snapshot::load(&canonical);
220    let cached = match cached {
221        Some(c) => c,
222        None => {
223            let result = scan_project_with_git(&canonical, opts, git);
224            return IncrementalScan {
225                result,
226                delta: ScanDelta {
227                    full_rescan: true,
228                    ..ScanDelta::default()
229                },
230            };
231        }
232    };
233
234    let discover_opts = discover::DiscoverOptions {
235        include_hidden: opts.include_hidden,
236        respect_gitignore: opts.respect_gitignore,
237    };
238    let mut current = discover::discover_files(&canonical, discover_opts, git);
239    if opts.max_files > 0 && current.len() > opts.max_files {
240        current.truncate(opts.max_files);
241    }
242
243    let delta = compute_delta(&current, &cached, explicit_changed);
244    let total = current.len();
245    let needs_full_rescan =
246        total > 0 && (delta.added.len() + delta.modified.len()) * 10 > total * 3;
247
248    if needs_full_rescan {
249        let result = scan_project_with_git(&canonical, opts, git);
250        return IncrementalScan {
251            result,
252            delta: ScanDelta {
253                full_rescan: true,
254                ..delta
255            },
256        };
257    }
258
259    if delta.added.is_empty() && delta.modified.is_empty() && delta.removed.is_empty() {
260        return IncrementalScan {
261            result: cached,
262            delta,
263        };
264    }
265
266    // Incremental path: rebuild only the touched files, then re-finalize.
267    let mut files = cached.files;
268    let mut symbols = cached.symbols;
269    let mut dependencies = cached.dependencies;
270
271    let removed_set: std::collections::HashSet<&str> =
272        delta.removed.iter().map(|s| s.as_str()).collect();
273    let touched_set: std::collections::HashSet<&str> = delta
274        .added
275        .iter()
276        .chain(delta.modified.iter())
277        .map(|s| s.as_str())
278        .collect();
279
280    files.retain(|f| !removed_set.contains(f.relative_path.as_str()));
281    symbols.retain(|s| {
282        !removed_set.contains(s.file_path.as_str()) && !touched_set.contains(s.file_path.as_str())
283    });
284    dependencies.retain(|d| {
285        !removed_set.contains(d.from_file.as_str()) && !touched_set.contains(d.from_file.as_str())
286    });
287
288    let touched_entries: Vec<discover::DiscoveredFile> = current
289        .iter()
290        .filter(|e| touched_set.contains(e.relative_path.as_str()))
291        .cloned()
292        .collect();
293    let (new_files, new_symbols, new_deps) = extract_per_file(&touched_entries);
294
295    let mut by_path: std::collections::BTreeMap<String, FileRecord> = files
296        .into_iter()
297        .map(|f| (f.relative_path.clone(), f))
298        .collect();
299    for new_file in new_files {
300        by_path.insert(new_file.relative_path.clone(), new_file);
301    }
302    let mut files: Vec<FileRecord> = by_path.into_values().collect();
303    symbols.extend(new_symbols);
304    dependencies.extend(new_deps);
305
306    scoring::compute_reference_counts(&mut symbols, &files);
307    if opts.include_git_history {
308        let churn = git.churn_scores(&canonical);
309        scoring::apply_churn(&mut files, &churn);
310    }
311    scoring::compute_importance_scores(&mut symbols, &files);
312    test_mapping::map_test_files(&mut files);
313
314    let folder_records = folders::build_folder_records(&files, &symbols);
315    let test_commands = commands::detect_test_commands(&canonical);
316    let code_patterns = commands::detect_code_patterns(&files, &canonical);
317    let project = folders::build_project_metadata(
318        &canonical,
319        &files,
320        test_commands,
321        code_patterns,
322        now_iso8601(),
323    );
324    let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
325    let sub_projects = subproject::detect_subprojects(&canonical, 2);
326
327    sort_for_output(&mut files, &mut symbols, &mut dependencies);
328
329    let token = snapshot::root_to_token(&canonical);
330    let result = ScanResult {
331        snapshot_token: token,
332        truncated: cached.truncated,
333        project,
334        folders: folder_records,
335        files,
336        symbols,
337        dependencies,
338        sub_projects,
339        repo_map,
340    };
341    snapshot::save(&canonical, &result);
342    IncrementalScan { result, delta }
343}
344
345// MARK: - Internals
346
347fn canonicalize(root: &Path) -> PathBuf {
348    std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
349}
350
351fn extract_per_file(
352    discovered: &[discover::DiscoveredFile],
353) -> (Vec<FileRecord>, Vec<SymbolRecord>, Vec<DependencyEdge>) {
354    let mut files: Vec<FileRecord> = Vec::with_capacity(discovered.len());
355    let mut symbols: Vec<SymbolRecord> = Vec::new();
356    let mut dependencies: Vec<DependencyEdge> = Vec::new();
357
358    for entry in discovered {
359        let metadata = std::fs::metadata(&entry.absolute_path);
360        let size = metadata.as_ref().map(|m| m.len()).unwrap_or(0);
361        let modified = metadata
362            .as_ref()
363            .ok()
364            .and_then(|m| m.modified().ok())
365            .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
366            .map(|d| d.as_millis() as i64)
367            .unwrap_or(0);
368
369        let content = std::fs::read_to_string(&entry.absolute_path).unwrap_or_default();
370        if content.is_empty() && size != 0 {
371            // Likely a non-utf8 binary; skip symbol/import extraction but still record the file.
372        }
373        let language = extensions::file_extension(&entry.relative_path);
374        let imports = imports::extract_imports(&content, &language);
375        let file_symbols = symbols::extract_symbols(&content, &language, &entry.relative_path);
376        let line_count = count_lines(&content);
377
378        for imp in &imports {
379            dependencies.push(DependencyEdge {
380                from_file: entry.relative_path.clone(),
381                to_module: imp.clone(),
382            });
383        }
384        symbols.extend(file_symbols);
385
386        files.push(FileRecord {
387            id: entry.relative_path.clone(),
388            relative_path: entry.relative_path.clone(),
389            file_name: extensions::file_name(&entry.relative_path).to_string(),
390            language,
391            line_count,
392            size_bytes: size,
393            last_modified_unix_ms: modified,
394            imports,
395            churn_score: 0.0,
396            corresponding_test_file: None,
397        });
398    }
399
400    (files, symbols, dependencies)
401}
402
403fn count_lines(content: &str) -> usize {
404    if content.is_empty() {
405        return 0;
406    }
407    let nl = content.bytes().filter(|b| *b == b'\n').count();
408    let trailing = content.as_bytes().last() != Some(&b'\n');
409    nl + if trailing { 1 } else { 0 }
410}
411
412fn sort_for_output(
413    files: &mut [FileRecord],
414    symbols: &mut [SymbolRecord],
415    dependencies: &mut [DependencyEdge],
416) {
417    files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
418    symbols.sort_by(|a, b| a.id.cmp(&b.id));
419    dependencies.sort_by(|a, b| {
420        a.from_file
421            .cmp(&b.from_file)
422            .then_with(|| a.to_module.cmp(&b.to_module))
423    });
424}
425
426fn compute_delta(
427    current: &[discover::DiscoveredFile],
428    cached: &ScanResult,
429    explicit_changed: Option<&[String]>,
430) -> ScanDelta {
431    let cached_files: std::collections::BTreeMap<&str, &FileRecord> = cached
432        .files
433        .iter()
434        .map(|f| (f.relative_path.as_str(), f))
435        .collect();
436    let current_paths: std::collections::HashSet<&str> =
437        current.iter().map(|e| e.relative_path.as_str()).collect();
438
439    let added: Vec<String> = current
440        .iter()
441        .filter(|e| !cached_files.contains_key(e.relative_path.as_str()))
442        .map(|e| e.relative_path.clone())
443        .collect();
444    let removed: Vec<String> = cached
445        .files
446        .iter()
447        .filter(|f| !current_paths.contains(f.relative_path.as_str()))
448        .map(|f| f.relative_path.clone())
449        .collect();
450
451    let modified: Vec<String> = if let Some(explicit) = explicit_changed {
452        explicit
453            .iter()
454            .filter(|p| cached_files.contains_key(p.as_str()) && current_paths.contains(p.as_str()))
455            .cloned()
456            .collect()
457    } else {
458        let mut out = Vec::new();
459        for entry in current {
460            if let Some(prev) = cached_files.get(entry.relative_path.as_str()) {
461                let mtime = std::fs::metadata(&entry.absolute_path)
462                    .ok()
463                    .and_then(|m| m.modified().ok())
464                    .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
465                    .map(|d| d.as_millis() as i64)
466                    .unwrap_or(0);
467                if mtime > prev.last_modified_unix_ms {
468                    out.push(entry.relative_path.clone());
469                }
470            }
471        }
472        out
473    };
474
475    ScanDelta {
476        added,
477        modified,
478        removed,
479        full_rescan: false,
480    }
481}
482
483fn now_iso8601() -> String {
484    let now = SystemTime::now()
485        .duration_since(UNIX_EPOCH)
486        .unwrap_or_default();
487    let secs = now.as_secs() as i64;
488    let nanos = now.subsec_nanos();
489    let (year, month, day, hour, minute, second) = unix_to_civil(secs);
490    format!(
491        "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{millis:03}Z",
492        millis = nanos / 1_000_000
493    )
494}
495
496/// Convert a unix timestamp (seconds, UTC) to civil date components. Uses
497/// Howard Hinnant's algorithm so we don't pull in `chrono` for one
498/// formatter.
499fn unix_to_civil(secs: i64) -> (i64, u32, u32, u32, u32, u32) {
500    let days = secs.div_euclid(86_400);
501    let day_secs = secs.rem_euclid(86_400);
502    let hour = (day_secs / 3600) as u32;
503    let minute = ((day_secs % 3600) / 60) as u32;
504    let second = (day_secs % 60) as u32;
505
506    // Days from 1970-01-01.
507    let z = days + 719_468;
508    let era = z.div_euclid(146_097);
509    let doe = z.rem_euclid(146_097) as u64;
510    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
511    let y = yoe as i64 + era * 400;
512    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
513    let mp = (5 * doy + 2) / 153;
514    let day = (doy - (153 * mp + 2) / 5 + 1) as u32;
515    let month = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
516    let year = if month <= 2 { y + 1 } else { y };
517    (year, month, day, hour, minute, second)
518}
519
520// MARK: - Builtin handlers (Harn dict ↔ Rust struct).
521
522fn scan_project_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
523    let raw = dict_arg(SCAN_PROJECT_BUILTIN, args)?;
524    let dict = raw.as_ref();
525    let root = require_string(SCAN_PROJECT_BUILTIN, dict, "root")?;
526    let opts = parse_options(SCAN_PROJECT_BUILTIN, dict)?;
527    let result = scan_project(Path::new(&root), opts);
528    Ok(scan_result_to_value(&result, None))
529}
530
531fn scan_incremental_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
532    let raw = dict_arg(SCAN_INCREMENTAL_BUILTIN, args)?;
533    let dict = raw.as_ref();
534    let token = require_string(SCAN_INCREMENTAL_BUILTIN, dict, "snapshot_token")?;
535    let opts = parse_options(SCAN_INCREMENTAL_BUILTIN, dict)?;
536    let changed = parse_changed_paths(SCAN_INCREMENTAL_BUILTIN, dict)?;
537    let scan = scan_incremental(&token, changed.as_deref(), opts);
538    Ok(scan_result_to_value(&scan.result, Some(&scan.delta)))
539}
540
541fn parse_options(
542    builtin: &'static str,
543    dict: &std::collections::BTreeMap<String, VmValue>,
544) -> Result<ScanProjectOptions, HostlibError> {
545    let include_hidden = optional_bool(builtin, dict, "include_hidden", false)?;
546    let respect_gitignore = optional_bool(builtin, dict, "respect_gitignore", true)?;
547    let max_files = optional_int(builtin, dict, "max_files", 0)?;
548    let include_git_history_default = builtin == SCAN_PROJECT_BUILTIN;
549    let include_git_history = optional_bool(
550        builtin,
551        dict,
552        "include_git_history",
553        include_git_history_default,
554    )?;
555    let repo_map_token_budget = optional_int(builtin, dict, "repo_map_token_budget", 1200)?;
556    if max_files < 0 {
557        return Err(HostlibError::InvalidParameter {
558            builtin,
559            param: "max_files",
560            message: "must be >= 0".to_string(),
561        });
562    }
563    if repo_map_token_budget < 0 {
564        return Err(HostlibError::InvalidParameter {
565            builtin,
566            param: "repo_map_token_budget",
567            message: "must be >= 0".to_string(),
568        });
569    }
570    Ok(ScanProjectOptions {
571        include_hidden,
572        respect_gitignore,
573        max_files: max_files as usize,
574        include_git_history,
575        repo_map_token_budget: repo_map_token_budget as usize,
576    })
577}
578
579fn parse_changed_paths(
580    builtin: &'static str,
581    dict: &std::collections::BTreeMap<String, VmValue>,
582) -> Result<Option<Vec<String>>, HostlibError> {
583    let value = match dict.get("changed_paths") {
584        None | Some(VmValue::Nil) => return Ok(None),
585        Some(v) => v,
586    };
587    let list = match value {
588        VmValue::List(items) => items,
589        other => {
590            return Err(HostlibError::InvalidParameter {
591                builtin,
592                param: "changed_paths",
593                message: format!("expected list of strings, got {}", other.type_name()),
594            });
595        }
596    };
597    let mut out = Vec::with_capacity(list.len());
598    for item in list.iter() {
599        match item {
600            VmValue::String(s) => out.push(s.to_string()),
601            other => {
602                return Err(HostlibError::InvalidParameter {
603                    builtin,
604                    param: "changed_paths",
605                    message: format!("non-string entry: {}", other.type_name()),
606                });
607            }
608        }
609    }
610    Ok(Some(out))
611}
612
613fn scan_result_to_value(result: &ScanResult, delta: Option<&ScanDelta>) -> VmValue {
614    let mut entries: Vec<(&'static str, VmValue)> = vec![
615        ("snapshot_token", str_value(&result.snapshot_token)),
616        ("truncated", VmValue::Bool(result.truncated)),
617        ("project", project_to_value(&result.project)),
618        ("folders", list_of(&result.folders, folder_to_value)),
619        ("files", list_of(&result.files, file_to_value)),
620        ("symbols", list_of(&result.symbols, symbol_to_value)),
621        (
622            "dependencies",
623            list_of(&result.dependencies, dependency_to_value),
624        ),
625        (
626            "sub_projects",
627            list_of(&result.sub_projects, subproject_to_value),
628        ),
629        ("repo_map", str_value(&result.repo_map)),
630    ];
631    if let Some(d) = delta {
632        entries.push(("delta", delta_to_value(d)));
633    }
634    build_dict(entries)
635}
636
637fn list_of<T>(items: &[T], to_value: fn(&T) -> VmValue) -> VmValue {
638    let list: Vec<VmValue> = items.iter().map(to_value).collect();
639    VmValue::List(Rc::new(list))
640}
641
642fn project_to_value(project: &ProjectMetadata) -> VmValue {
643    let test_commands_entries: Vec<(String, VmValue)> = project
644        .test_commands
645        .iter()
646        .map(|(k, v)| (k.clone(), str_value(v)))
647        .collect();
648    let test_commands_dict = build_dict(test_commands_entries);
649
650    let detected: VmValue = project
651        .detected_test_command
652        .as_deref()
653        .map(str_value)
654        .unwrap_or(VmValue::Nil);
655
656    let code_patterns: Vec<VmValue> = project.code_patterns.iter().map(str_value).collect();
657
658    build_dict([
659        ("name", str_value(&project.name)),
660        ("root_path", str_value(&project.root_path)),
661        ("languages", list_of(&project.languages, language_to_value)),
662        ("test_commands", test_commands_dict),
663        ("detected_test_command", detected),
664        ("code_patterns", VmValue::List(Rc::new(code_patterns))),
665        ("total_files", VmValue::Int(project.total_files as i64)),
666        ("total_lines", VmValue::Int(project.total_lines as i64)),
667        ("last_scanned_at", str_value(&project.last_scanned_at)),
668    ])
669}
670
671fn language_to_value(stat: &LanguageStat) -> VmValue {
672    build_dict([
673        ("name", str_value(&stat.name)),
674        ("file_count", VmValue::Int(stat.file_count as i64)),
675        ("line_count", VmValue::Int(stat.line_count as i64)),
676        ("percentage", VmValue::Float(stat.percentage)),
677    ])
678}
679
680fn folder_to_value(folder: &FolderRecord) -> VmValue {
681    let names: Vec<VmValue> = folder.key_symbol_names.iter().map(str_value).collect();
682    build_dict([
683        ("id", str_value(&folder.id)),
684        ("relative_path", str_value(&folder.relative_path)),
685        ("file_count", VmValue::Int(folder.file_count as i64)),
686        ("line_count", VmValue::Int(folder.line_count as i64)),
687        ("dominant_language", str_value(&folder.dominant_language)),
688        ("key_symbol_names", VmValue::List(Rc::new(names))),
689    ])
690}
691
692fn file_to_value(file: &FileRecord) -> VmValue {
693    let imports: Vec<VmValue> = file.imports.iter().map(str_value).collect();
694    let test_pair = file
695        .corresponding_test_file
696        .as_deref()
697        .map(str_value)
698        .unwrap_or(VmValue::Nil);
699    build_dict([
700        ("id", str_value(&file.id)),
701        ("relative_path", str_value(&file.relative_path)),
702        ("file_name", str_value(&file.file_name)),
703        ("language", str_value(&file.language)),
704        ("line_count", VmValue::Int(file.line_count as i64)),
705        ("size_bytes", VmValue::Int(file.size_bytes as i64)),
706        (
707            "last_modified_unix_ms",
708            VmValue::Int(file.last_modified_unix_ms),
709        ),
710        ("imports", VmValue::List(Rc::new(imports))),
711        ("churn_score", VmValue::Float(file.churn_score)),
712        ("corresponding_test_file", test_pair),
713    ])
714}
715
716fn symbol_to_value(symbol: &SymbolRecord) -> VmValue {
717    let container = symbol
718        .container
719        .as_deref()
720        .map(str_value)
721        .unwrap_or(VmValue::Nil);
722    build_dict([
723        ("id", str_value(&symbol.id)),
724        ("name", str_value(&symbol.name)),
725        ("kind", str_value(symbol.kind.keyword())),
726        ("file_path", str_value(&symbol.file_path)),
727        ("line", VmValue::Int(symbol.line as i64)),
728        ("signature", str_value(&symbol.signature)),
729        ("container", container),
730        (
731            "reference_count",
732            VmValue::Int(symbol.reference_count as i64),
733        ),
734        ("importance_score", VmValue::Float(symbol.importance_score)),
735    ])
736}
737
738fn dependency_to_value(dep: &DependencyEdge) -> VmValue {
739    build_dict([
740        ("from_file", str_value(&dep.from_file)),
741        ("to_module", str_value(&dep.to_module)),
742    ])
743}
744
745fn subproject_to_value(sp: &SubProject) -> VmValue {
746    build_dict([
747        ("path", str_value(&sp.path)),
748        ("name", str_value(&sp.name)),
749        ("language", str_value(&sp.language)),
750        ("project_marker", str_value(&sp.project_marker)),
751    ])
752}
753
754fn delta_to_value(delta: &ScanDelta) -> VmValue {
755    let added: Vec<VmValue> = delta.added.iter().map(str_value).collect();
756    let modified: Vec<VmValue> = delta.modified.iter().map(str_value).collect();
757    let removed: Vec<VmValue> = delta.removed.iter().map(str_value).collect();
758    build_dict([
759        ("added", VmValue::List(Rc::new(added))),
760        ("modified", VmValue::List(Rc::new(modified))),
761        ("removed", VmValue::List(Rc::new(removed))),
762        ("full_rescan", VmValue::Bool(delta.full_rescan)),
763    ])
764}
765
766#[cfg(test)]
767mod tests {
768    use super::*;
769
770    #[test]
771    fn builtin_option_defaults_match_request_schemas() {
772        let dict = std::collections::BTreeMap::new();
773
774        let scan_project = parse_options(SCAN_PROJECT_BUILTIN, &dict).unwrap();
775        let scan_incremental = parse_options(SCAN_INCREMENTAL_BUILTIN, &dict).unwrap();
776
777        assert!(scan_project.include_git_history);
778        assert!(!scan_incremental.include_git_history);
779    }
780}