Skip to main content

harn_hostlib/scanner/
mod.rs

1//! Repo scanner host capability.
2//!
3//! Deterministic project-wide file enumeration honoring `.gitignore` and
4//! the [`extensions::EXCLUDED_DIRS`] table, symbol extraction,
5//! import-derived dependency graph, reference + churn + importance
6//! scoring, source/test pairing, folder aggregates, project metadata
7//! (language stats + detected test commands + code-pattern hints),
8//! sub-project detection, and a token-budgeted text repo map.
9//!
10//! `scan_project` returns the full [`result::ScanResult`] alongside an
11//! opaque `snapshot_token` derived from the canonicalized root path. The
12//! result is persisted to `<root>/.harn/hostlib/scanner-snapshot.json` so
13//! that `scan_incremental` can diff against it later — without forcing the
14//! caller to pass the previous result back over the wire.
15
16use std::path::{Path, PathBuf};
17use std::process::Command;
18use std::sync::Arc;
19use std::time::{SystemTime, UNIX_EPOCH};
20
21use harn_vm::VmValue;
22
23use crate::error::HostlibError;
24use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
25use crate::tools::args::{
26    build_dict, dict_arg, optional_bool, optional_int, require_string, str_value,
27};
28
29mod commands;
30mod discover;
31mod extensions;
32mod folders;
33mod git;
34mod imports;
35mod result;
36mod scoring;
37mod snapshot;
38mod subproject;
39mod symbols;
40mod test_mapping;
41
42fn strip_ambient_git_env(cmd: &mut Command) {
43    // Git exports repository-specific GIT_* variables while running hooks.
44    // Scanner probes must honor their explicit `-C <root>` argument instead.
45    for (key, _) in std::env::vars() {
46        if key.starts_with("GIT_") {
47            cmd.env_remove(&key);
48        }
49    }
50}
51
52pub use git::GitCapabilities;
53pub use result::{
54    DependencyEdge, FileRecord, FolderRecord, LanguageStat, ProjectMetadata, ScanDelta, ScanResult,
55    SubProject, SymbolKind, SymbolRecord,
56};
57
58const SCAN_PROJECT_BUILTIN: &str = "hostlib_scanner_scan_project";
59const SCAN_INCREMENTAL_BUILTIN: &str = "hostlib_scanner_scan_incremental";
60
61/// Scanner capability handle.
62#[derive(Default)]
63pub struct ScannerCapability;
64
65impl HostlibCapability for ScannerCapability {
66    fn module_name(&self) -> &'static str {
67        "scanner"
68    }
69
70    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
71        let scan_project: SyncHandler = Arc::new(scan_project_handler);
72        registry.register(RegisteredBuiltin {
73            name: SCAN_PROJECT_BUILTIN,
74            module: "scanner",
75            method: "scan_project",
76            handler: scan_project,
77        });
78        let scan_incremental: SyncHandler = Arc::new(scan_incremental_handler);
79        registry.register(RegisteredBuiltin {
80            name: SCAN_INCREMENTAL_BUILTIN,
81            module: "scanner",
82            method: "scan_incremental",
83            handler: scan_incremental,
84        });
85    }
86}
87
88// MARK: - Public Rust API (used by tests + by harn-cli embedders).
89
90/// Tunable knobs accepted by [`scan_project`].
91#[derive(Clone, Debug)]
92pub struct ScanProjectOptions {
93    /// Include hidden (`.`) entries during walking.
94    pub include_hidden: bool,
95    /// Honor `.gitignore`.
96    pub respect_gitignore: bool,
97    /// Hard cap on file count (0 = unlimited).
98    pub max_files: usize,
99    /// Run `git log` to compute churn scores.
100    pub include_git_history: bool,
101    /// Approximate token budget for the text repo map.
102    pub repo_map_token_budget: usize,
103}
104
105impl Default for ScanProjectOptions {
106    fn default() -> Self {
107        Self {
108            include_hidden: false,
109            respect_gitignore: true,
110            max_files: 0,
111            include_git_history: true,
112            repo_map_token_budget: 1200,
113        }
114    }
115}
116
117/// Run a full scan of `root`, persist a snapshot, and return the result.
118pub fn scan_project(root: &Path, opts: ScanProjectOptions) -> ScanResult {
119    scan_project_with_git(root, opts, &git::CliGitCapabilities)
120}
121
122/// Run a full scan using caller-supplied Git data.
123///
124/// Embedders normally call [`scan_project`]. Tests and hosts that already
125/// virtualize Git can use this entry point to keep scanner behavior
126/// deterministic without depending on ambient process state.
127pub fn scan_project_with_git(
128    root: &Path,
129    opts: ScanProjectOptions,
130    git: &dyn GitCapabilities,
131) -> ScanResult {
132    let canonical = canonicalize(root);
133    let discover_opts = discover::DiscoverOptions {
134        include_hidden: opts.include_hidden,
135        respect_gitignore: opts.respect_gitignore,
136    };
137    let mut discovered = discover::discover_files(&canonical, discover_opts, git);
138    let truncated = if opts.max_files > 0 && discovered.len() > opts.max_files {
139        discovered.truncate(opts.max_files);
140        true
141    } else {
142        false
143    };
144
145    let (mut files, mut symbols, mut dependencies) = extract_per_file(&discovered);
146
147    scoring::compute_reference_counts(&mut symbols, &files);
148
149    if opts.include_git_history {
150        let churn = git.churn_scores(&canonical);
151        scoring::apply_churn(&mut files, &churn);
152    }
153    scoring::compute_importance_scores(&mut symbols, &files);
154
155    test_mapping::map_test_files(&mut files);
156
157    let folder_records = folders::build_folder_records(&files, &symbols);
158    let test_commands = commands::detect_test_commands(&canonical);
159    let code_patterns = commands::detect_code_patterns(&files, &canonical);
160    let project = folders::build_project_metadata(
161        &canonical,
162        &files,
163        test_commands,
164        code_patterns,
165        now_iso8601(),
166    );
167    let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
168    let sub_projects = subproject::detect_subprojects(&canonical, 2);
169
170    sort_for_output(&mut files, &mut symbols, &mut dependencies);
171
172    let token = snapshot::root_to_token(&canonical);
173    let result = ScanResult {
174        snapshot_token: token,
175        truncated,
176        project,
177        folders: folder_records,
178        files,
179        symbols,
180        dependencies,
181        sub_projects,
182        repo_map,
183    };
184    snapshot::save(&canonical, &result);
185    result
186}
187
188/// Result returned by [`scan_incremental`].
189#[derive(Clone, Debug)]
190pub struct IncrementalScan {
191    /// Refreshed scan result.
192    pub result: ScanResult,
193    /// Path delta computed against the snapshot.
194    pub delta: ScanDelta,
195}
196
197/// Refresh the snapshot named by `token`. If the snapshot is missing, the
198/// diff is too large (>30%), or `changed_paths` is empty after `>30%` of
199/// the workspace mtime-mismatched, falls back to a full rescan.
200pub fn scan_incremental(
201    token: &str,
202    explicit_changed: Option<&[String]>,
203    opts: ScanProjectOptions,
204) -> IncrementalScan {
205    scan_incremental_with_git(token, explicit_changed, opts, &git::CliGitCapabilities)
206}
207
208/// Refresh a snapshot using caller-supplied Git data.
209pub fn scan_incremental_with_git(
210    token: &str,
211    explicit_changed: Option<&[String]>,
212    opts: ScanProjectOptions,
213    git: &dyn GitCapabilities,
214) -> IncrementalScan {
215    let root = snapshot::token_to_root(token);
216    let canonical = canonicalize(&root);
217
218    let cached = snapshot::load(&canonical);
219    let cached = match cached {
220        Some(c) => c,
221        None => {
222            let result = scan_project_with_git(&canonical, opts, git);
223            return IncrementalScan {
224                result,
225                delta: ScanDelta {
226                    full_rescan: true,
227                    ..ScanDelta::default()
228                },
229            };
230        }
231    };
232
233    let discover_opts = discover::DiscoverOptions {
234        include_hidden: opts.include_hidden,
235        respect_gitignore: opts.respect_gitignore,
236    };
237    let mut current = discover::discover_files(&canonical, discover_opts, git);
238    if opts.max_files > 0 && current.len() > opts.max_files {
239        current.truncate(opts.max_files);
240    }
241
242    let delta = compute_delta(&current, &cached, explicit_changed);
243    let total = current.len();
244    let needs_full_rescan =
245        total > 0 && (delta.added.len() + delta.modified.len()) * 10 > total * 3;
246
247    if needs_full_rescan {
248        let result = scan_project_with_git(&canonical, opts, git);
249        return IncrementalScan {
250            result,
251            delta: ScanDelta {
252                full_rescan: true,
253                ..delta
254            },
255        };
256    }
257
258    if delta.added.is_empty() && delta.modified.is_empty() && delta.removed.is_empty() {
259        return IncrementalScan {
260            result: cached,
261            delta,
262        };
263    }
264
265    // Incremental path: rebuild only the touched files, then re-finalize.
266    let mut files = cached.files;
267    let mut symbols = cached.symbols;
268    let mut dependencies = cached.dependencies;
269
270    let removed_set: std::collections::HashSet<&str> =
271        delta.removed.iter().map(|s| s.as_str()).collect();
272    let touched_set: std::collections::HashSet<&str> = delta
273        .added
274        .iter()
275        .chain(delta.modified.iter())
276        .map(|s| s.as_str())
277        .collect();
278
279    files.retain(|f| !removed_set.contains(f.relative_path.as_str()));
280    symbols.retain(|s| {
281        !removed_set.contains(s.file_path.as_str()) && !touched_set.contains(s.file_path.as_str())
282    });
283    dependencies.retain(|d| {
284        !removed_set.contains(d.from_file.as_str()) && !touched_set.contains(d.from_file.as_str())
285    });
286
287    let touched_entries: Vec<discover::DiscoveredFile> = current
288        .iter()
289        .filter(|e| touched_set.contains(e.relative_path.as_str()))
290        .cloned()
291        .collect();
292    let (new_files, new_symbols, new_deps) = extract_per_file(&touched_entries);
293
294    let mut by_path: std::collections::BTreeMap<String, FileRecord> = files
295        .into_iter()
296        .map(|f| (f.relative_path.clone(), f))
297        .collect();
298    for new_file in new_files {
299        by_path.insert(new_file.relative_path.clone(), new_file);
300    }
301    let mut files: Vec<FileRecord> = by_path.into_values().collect();
302    symbols.extend(new_symbols);
303    dependencies.extend(new_deps);
304
305    scoring::compute_reference_counts(&mut symbols, &files);
306    if opts.include_git_history {
307        let churn = git.churn_scores(&canonical);
308        scoring::apply_churn(&mut files, &churn);
309    }
310    scoring::compute_importance_scores(&mut symbols, &files);
311    test_mapping::map_test_files(&mut files);
312
313    let folder_records = folders::build_folder_records(&files, &symbols);
314    let test_commands = commands::detect_test_commands(&canonical);
315    let code_patterns = commands::detect_code_patterns(&files, &canonical);
316    let project = folders::build_project_metadata(
317        &canonical,
318        &files,
319        test_commands,
320        code_patterns,
321        now_iso8601(),
322    );
323    let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
324    let sub_projects = subproject::detect_subprojects(&canonical, 2);
325
326    sort_for_output(&mut files, &mut symbols, &mut dependencies);
327
328    let token = snapshot::root_to_token(&canonical);
329    let result = ScanResult {
330        snapshot_token: token,
331        truncated: cached.truncated,
332        project,
333        folders: folder_records,
334        files,
335        symbols,
336        dependencies,
337        sub_projects,
338        repo_map,
339    };
340    snapshot::save(&canonical, &result);
341    IncrementalScan { result, delta }
342}
343
344// MARK: - Internals
345
346fn canonicalize(root: &Path) -> PathBuf {
347    std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
348}
349
350fn extract_per_file(
351    discovered: &[discover::DiscoveredFile],
352) -> (Vec<FileRecord>, Vec<SymbolRecord>, Vec<DependencyEdge>) {
353    let mut files: Vec<FileRecord> = Vec::with_capacity(discovered.len());
354    let mut symbols: Vec<SymbolRecord> = Vec::new();
355    let mut dependencies: Vec<DependencyEdge> = Vec::new();
356
357    for entry in discovered {
358        let metadata = std::fs::metadata(&entry.absolute_path);
359        let size = metadata.as_ref().map(|m| m.len()).unwrap_or(0);
360        let modified = metadata
361            .as_ref()
362            .ok()
363            .and_then(|m| m.modified().ok())
364            .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
365            .map(|d| d.as_millis() as i64)
366            .unwrap_or(0);
367
368        let content = std::fs::read_to_string(&entry.absolute_path).unwrap_or_default();
369        if content.is_empty() && size != 0 {
370            // Likely a non-utf8 binary; skip symbol/import extraction but still record the file.
371        }
372        let language = extensions::file_extension(&entry.relative_path);
373        let imports = imports::extract_imports(&content, &language);
374        let file_symbols = symbols::extract_symbols(&content, &language, &entry.relative_path);
375        let line_count = count_lines(&content);
376
377        for imp in &imports {
378            dependencies.push(DependencyEdge {
379                from_file: entry.relative_path.clone(),
380                to_module: imp.clone(),
381            });
382        }
383        symbols.extend(file_symbols);
384
385        files.push(FileRecord {
386            id: entry.relative_path.clone(),
387            relative_path: entry.relative_path.clone(),
388            file_name: extensions::file_name(&entry.relative_path).to_string(),
389            language,
390            line_count,
391            size_bytes: size,
392            last_modified_unix_ms: modified,
393            imports,
394            churn_score: 0.0,
395            corresponding_test_file: None,
396        });
397    }
398
399    (files, symbols, dependencies)
400}
401
402fn count_lines(content: &str) -> usize {
403    if content.is_empty() {
404        return 0;
405    }
406    let nl = content.bytes().filter(|b| *b == b'\n').count();
407    let trailing = content.as_bytes().last() != Some(&b'\n');
408    nl + usize::from(trailing)
409}
410
411fn sort_for_output(
412    files: &mut [FileRecord],
413    symbols: &mut [SymbolRecord],
414    dependencies: &mut [DependencyEdge],
415) {
416    files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
417    symbols.sort_by(|a, b| a.id.cmp(&b.id));
418    dependencies.sort_by(|a, b| {
419        a.from_file
420            .cmp(&b.from_file)
421            .then_with(|| a.to_module.cmp(&b.to_module))
422    });
423}
424
425fn compute_delta(
426    current: &[discover::DiscoveredFile],
427    cached: &ScanResult,
428    explicit_changed: Option<&[String]>,
429) -> ScanDelta {
430    let cached_files: std::collections::BTreeMap<&str, &FileRecord> = cached
431        .files
432        .iter()
433        .map(|f| (f.relative_path.as_str(), f))
434        .collect();
435    let current_paths: std::collections::HashSet<&str> =
436        current.iter().map(|e| e.relative_path.as_str()).collect();
437
438    let added: Vec<String> = current
439        .iter()
440        .filter(|e| !cached_files.contains_key(e.relative_path.as_str()))
441        .map(|e| e.relative_path.clone())
442        .collect();
443    let removed: Vec<String> = cached
444        .files
445        .iter()
446        .filter(|f| !current_paths.contains(f.relative_path.as_str()))
447        .map(|f| f.relative_path.clone())
448        .collect();
449
450    let modified: Vec<String> = if let Some(explicit) = explicit_changed {
451        explicit
452            .iter()
453            .filter(|p| cached_files.contains_key(p.as_str()) && current_paths.contains(p.as_str()))
454            .cloned()
455            .collect()
456    } else {
457        let mut out = Vec::new();
458        for entry in current {
459            if let Some(prev) = cached_files.get(entry.relative_path.as_str()) {
460                let mtime = std::fs::metadata(&entry.absolute_path)
461                    .ok()
462                    .and_then(|m| m.modified().ok())
463                    .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
464                    .map(|d| d.as_millis() as i64)
465                    .unwrap_or(0);
466                if mtime > prev.last_modified_unix_ms {
467                    out.push(entry.relative_path.clone());
468                }
469            }
470        }
471        out
472    };
473
474    ScanDelta {
475        added,
476        modified,
477        removed,
478        full_rescan: false,
479    }
480}
481
482fn now_iso8601() -> String {
483    let now = SystemTime::now()
484        .duration_since(UNIX_EPOCH)
485        .unwrap_or_default();
486    let secs = now.as_secs() as i64;
487    let nanos = now.subsec_nanos();
488    let (year, month, day, hour, minute, second) = unix_to_civil(secs);
489    format!(
490        "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{millis:03}Z",
491        millis = nanos / 1_000_000
492    )
493}
494
495/// Convert a unix timestamp (seconds, UTC) to civil date components. Uses
496/// Howard Hinnant's algorithm so we don't pull in `chrono` for one
497/// formatter.
498fn unix_to_civil(secs: i64) -> (i64, u32, u32, u32, u32, u32) {
499    let days = secs.div_euclid(86_400);
500    let day_secs = secs.rem_euclid(86_400);
501    let hour = (day_secs / 3600) as u32;
502    let minute = ((day_secs % 3600) / 60) as u32;
503    let second = (day_secs % 60) as u32;
504
505    // Days from 1970-01-01.
506    let z = days + 719_468;
507    let era = z.div_euclid(146_097);
508    let doe = z.rem_euclid(146_097) as u64;
509    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
510    let y = yoe as i64 + era * 400;
511    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
512    let mp = (5 * doy + 2) / 153;
513    let day = (doy - (153 * mp + 2) / 5 + 1) as u32;
514    let month = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
515    let year = if month <= 2 { y + 1 } else { y };
516    (year, month, day, hour, minute, second)
517}
518
519// MARK: - Builtin handlers (Harn dict ↔ Rust struct).
520
521fn scan_project_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
522    let raw = dict_arg(SCAN_PROJECT_BUILTIN, args)?;
523    let dict = raw.as_ref();
524    let root = require_string(SCAN_PROJECT_BUILTIN, dict, "root")?;
525    let opts = parse_options(SCAN_PROJECT_BUILTIN, dict)?;
526    let result = scan_project(Path::new(&root), opts);
527    Ok(scan_result_to_value(&result, None))
528}
529
530fn scan_incremental_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
531    let raw = dict_arg(SCAN_INCREMENTAL_BUILTIN, args)?;
532    let dict = raw.as_ref();
533    let token = require_string(SCAN_INCREMENTAL_BUILTIN, dict, "snapshot_token")?;
534    let opts = parse_options(SCAN_INCREMENTAL_BUILTIN, dict)?;
535    let changed = parse_changed_paths(SCAN_INCREMENTAL_BUILTIN, dict)?;
536    let scan = scan_incremental(&token, changed.as_deref(), opts);
537    Ok(scan_result_to_value(&scan.result, Some(&scan.delta)))
538}
539
540fn parse_options(
541    builtin: &'static str,
542    dict: &std::collections::BTreeMap<String, VmValue>,
543) -> Result<ScanProjectOptions, HostlibError> {
544    let include_hidden = optional_bool(builtin, dict, "include_hidden", false)?;
545    let respect_gitignore = optional_bool(builtin, dict, "respect_gitignore", true)?;
546    let max_files = optional_int(builtin, dict, "max_files", 0)?;
547    let include_git_history_default = builtin == SCAN_PROJECT_BUILTIN;
548    let include_git_history = optional_bool(
549        builtin,
550        dict,
551        "include_git_history",
552        include_git_history_default,
553    )?;
554    let repo_map_token_budget = optional_int(builtin, dict, "repo_map_token_budget", 1200)?;
555    if max_files < 0 {
556        return Err(HostlibError::InvalidParameter {
557            builtin,
558            param: "max_files",
559            message: "must be >= 0".to_string(),
560        });
561    }
562    if repo_map_token_budget < 0 {
563        return Err(HostlibError::InvalidParameter {
564            builtin,
565            param: "repo_map_token_budget",
566            message: "must be >= 0".to_string(),
567        });
568    }
569    Ok(ScanProjectOptions {
570        include_hidden,
571        respect_gitignore,
572        max_files: max_files as usize,
573        include_git_history,
574        repo_map_token_budget: repo_map_token_budget as usize,
575    })
576}
577
578fn parse_changed_paths(
579    builtin: &'static str,
580    dict: &std::collections::BTreeMap<String, VmValue>,
581) -> Result<Option<Vec<String>>, HostlibError> {
582    let value = match dict.get("changed_paths") {
583        None | Some(VmValue::Nil) => return Ok(None),
584        Some(v) => v,
585    };
586    let list = match value {
587        VmValue::List(items) => items,
588        other => {
589            return Err(HostlibError::InvalidParameter {
590                builtin,
591                param: "changed_paths",
592                message: format!("expected list of strings, got {}", other.type_name()),
593            });
594        }
595    };
596    let mut out = Vec::with_capacity(list.len());
597    for item in list.iter() {
598        match item {
599            VmValue::String(s) => out.push(s.to_string()),
600            other => {
601                return Err(HostlibError::InvalidParameter {
602                    builtin,
603                    param: "changed_paths",
604                    message: format!("non-string entry: {}", other.type_name()),
605                });
606            }
607        }
608    }
609    Ok(Some(out))
610}
611
612fn scan_result_to_value(result: &ScanResult, delta: Option<&ScanDelta>) -> VmValue {
613    let mut entries: Vec<(&'static str, VmValue)> = vec![
614        ("snapshot_token", str_value(&result.snapshot_token)),
615        ("truncated", VmValue::Bool(result.truncated)),
616        ("project", project_to_value(&result.project)),
617        ("folders", list_of(&result.folders, folder_to_value)),
618        ("files", list_of(&result.files, file_to_value)),
619        ("symbols", list_of(&result.symbols, symbol_to_value)),
620        (
621            "dependencies",
622            list_of(&result.dependencies, dependency_to_value),
623        ),
624        (
625            "sub_projects",
626            list_of(&result.sub_projects, subproject_to_value),
627        ),
628        ("repo_map", str_value(&result.repo_map)),
629    ];
630    if let Some(d) = delta {
631        entries.push(("delta", delta_to_value(d)));
632    }
633    build_dict(entries)
634}
635
636fn list_of<T>(items: &[T], to_value: fn(&T) -> VmValue) -> VmValue {
637    let list: Vec<VmValue> = items.iter().map(to_value).collect();
638    VmValue::List(Arc::new(list))
639}
640
641fn project_to_value(project: &ProjectMetadata) -> VmValue {
642    let test_commands_entries: Vec<(String, VmValue)> = project
643        .test_commands
644        .iter()
645        .map(|(k, v)| (k.clone(), str_value(v)))
646        .collect();
647    let test_commands_dict = build_dict(test_commands_entries);
648
649    let detected: VmValue = project
650        .detected_test_command
651        .as_deref()
652        .map(str_value)
653        .unwrap_or(VmValue::Nil);
654
655    let code_patterns: Vec<VmValue> = project.code_patterns.iter().map(str_value).collect();
656
657    build_dict([
658        ("name", str_value(&project.name)),
659        ("root_path", str_value(&project.root_path)),
660        ("languages", list_of(&project.languages, language_to_value)),
661        ("test_commands", test_commands_dict),
662        ("detected_test_command", detected),
663        ("code_patterns", VmValue::List(Arc::new(code_patterns))),
664        ("total_files", VmValue::Int(project.total_files as i64)),
665        ("total_lines", VmValue::Int(project.total_lines as i64)),
666        ("last_scanned_at", str_value(&project.last_scanned_at)),
667    ])
668}
669
670fn language_to_value(stat: &LanguageStat) -> VmValue {
671    build_dict([
672        ("name", str_value(&stat.name)),
673        ("file_count", VmValue::Int(stat.file_count as i64)),
674        ("line_count", VmValue::Int(stat.line_count as i64)),
675        ("percentage", VmValue::Float(stat.percentage)),
676    ])
677}
678
679fn folder_to_value(folder: &FolderRecord) -> VmValue {
680    let names: Vec<VmValue> = folder.key_symbol_names.iter().map(str_value).collect();
681    build_dict([
682        ("id", str_value(&folder.id)),
683        ("relative_path", str_value(&folder.relative_path)),
684        ("file_count", VmValue::Int(folder.file_count as i64)),
685        ("line_count", VmValue::Int(folder.line_count as i64)),
686        ("dominant_language", str_value(&folder.dominant_language)),
687        ("key_symbol_names", VmValue::List(Arc::new(names))),
688    ])
689}
690
691fn file_to_value(file: &FileRecord) -> VmValue {
692    let imports: Vec<VmValue> = file.imports.iter().map(str_value).collect();
693    let test_pair = file
694        .corresponding_test_file
695        .as_deref()
696        .map(str_value)
697        .unwrap_or(VmValue::Nil);
698    build_dict([
699        ("id", str_value(&file.id)),
700        ("relative_path", str_value(&file.relative_path)),
701        ("file_name", str_value(&file.file_name)),
702        ("language", str_value(&file.language)),
703        ("line_count", VmValue::Int(file.line_count as i64)),
704        ("size_bytes", VmValue::Int(file.size_bytes as i64)),
705        (
706            "last_modified_unix_ms",
707            VmValue::Int(file.last_modified_unix_ms),
708        ),
709        ("imports", VmValue::List(Arc::new(imports))),
710        ("churn_score", VmValue::Float(file.churn_score)),
711        ("corresponding_test_file", test_pair),
712    ])
713}
714
715fn symbol_to_value(symbol: &SymbolRecord) -> VmValue {
716    let container = symbol
717        .container
718        .as_deref()
719        .map(str_value)
720        .unwrap_or(VmValue::Nil);
721    build_dict([
722        ("id", str_value(&symbol.id)),
723        ("name", str_value(&symbol.name)),
724        ("kind", str_value(symbol.kind.keyword())),
725        ("file_path", str_value(&symbol.file_path)),
726        ("line", VmValue::Int(symbol.line as i64)),
727        ("signature", str_value(&symbol.signature)),
728        ("container", container),
729        (
730            "reference_count",
731            VmValue::Int(symbol.reference_count as i64),
732        ),
733        ("importance_score", VmValue::Float(symbol.importance_score)),
734    ])
735}
736
737fn dependency_to_value(dep: &DependencyEdge) -> VmValue {
738    build_dict([
739        ("from_file", str_value(&dep.from_file)),
740        ("to_module", str_value(&dep.to_module)),
741    ])
742}
743
744fn subproject_to_value(sp: &SubProject) -> VmValue {
745    build_dict([
746        ("path", str_value(&sp.path)),
747        ("name", str_value(&sp.name)),
748        ("language", str_value(&sp.language)),
749        ("project_marker", str_value(&sp.project_marker)),
750    ])
751}
752
753fn delta_to_value(delta: &ScanDelta) -> VmValue {
754    let added: Vec<VmValue> = delta.added.iter().map(str_value).collect();
755    let modified: Vec<VmValue> = delta.modified.iter().map(str_value).collect();
756    let removed: Vec<VmValue> = delta.removed.iter().map(str_value).collect();
757    build_dict([
758        ("added", VmValue::List(Arc::new(added))),
759        ("modified", VmValue::List(Arc::new(modified))),
760        ("removed", VmValue::List(Arc::new(removed))),
761        ("full_rescan", VmValue::Bool(delta.full_rescan)),
762    ])
763}
764
765#[cfg(test)]
766mod tests {
767    use super::*;
768
769    #[test]
770    fn builtin_option_defaults_match_request_schemas() {
771        let dict = std::collections::BTreeMap::new();
772
773        let scan_project = parse_options(SCAN_PROJECT_BUILTIN, &dict).unwrap();
774        let scan_incremental = parse_options(SCAN_INCREMENTAL_BUILTIN, &dict).unwrap();
775
776        assert!(scan_project.include_git_history);
777        assert!(!scan_incremental.include_git_history);
778    }
779}