Skip to main content

harn_hostlib/scanner/
mod.rs

1//! Repo scanner host capability.
2//!
3//! Ports `Sources/BurinCore/Scanner/CoreRepoScanner.swift` from
4//! `burin-labs/burin-code` into Rust: deterministic project-wide file
5//! enumeration honoring `.gitignore` and the [`extensions::EXCLUDED_DIRS`]
6//! table, symbol extraction, import-derived dependency graph,
7//! reference + churn + importance scoring, source/test pairing, folder
8//! aggregates, project metadata (language stats + detected test
9//! commands + code-pattern hints), sub-project detection, and a
10//! token-budgeted text repo map.
11//!
12//! `scan_project` returns the full [`result::ScanResult`] alongside an
13//! opaque `snapshot_token` derived from the canonicalized root path. The
14//! result is persisted to `<root>/.harn/hostlib/scanner-snapshot.json` so
15//! that `scan_incremental` can diff against it later — without forcing the
16//! caller to pass the previous result back over the wire.
17
18use std::path::{Path, PathBuf};
19use std::rc::Rc;
20use std::sync::Arc;
21use std::time::{SystemTime, UNIX_EPOCH};
22
23use harn_vm::VmValue;
24
25use crate::error::HostlibError;
26use crate::registry::{BuiltinRegistry, HostlibCapability, RegisteredBuiltin, SyncHandler};
27use crate::tools::args::{
28    build_dict, dict_arg, optional_bool, optional_int, require_string, str_value,
29};
30
31mod commands;
32mod discover;
33mod extensions;
34mod folders;
35mod imports;
36mod result;
37mod scoring;
38mod snapshot;
39mod subproject;
40mod symbols;
41mod test_mapping;
42
43pub use result::{
44    DependencyEdge, FileRecord, FolderRecord, LanguageStat, ProjectMetadata, ScanDelta, ScanResult,
45    SubProject, SymbolKind, SymbolRecord,
46};
47
48const SCAN_PROJECT_BUILTIN: &str = "hostlib_scanner_scan_project";
49const SCAN_INCREMENTAL_BUILTIN: &str = "hostlib_scanner_scan_incremental";
50
51/// Scanner capability handle.
52#[derive(Default)]
53pub struct ScannerCapability;
54
55impl HostlibCapability for ScannerCapability {
56    fn module_name(&self) -> &'static str {
57        "scanner"
58    }
59
60    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
61        let scan_project: SyncHandler = Arc::new(scan_project_handler);
62        registry.register(RegisteredBuiltin {
63            name: SCAN_PROJECT_BUILTIN,
64            module: "scanner",
65            method: "scan_project",
66            handler: scan_project,
67        });
68        let scan_incremental: SyncHandler = Arc::new(scan_incremental_handler);
69        registry.register(RegisteredBuiltin {
70            name: SCAN_INCREMENTAL_BUILTIN,
71            module: "scanner",
72            method: "scan_incremental",
73            handler: scan_incremental,
74        });
75    }
76}
77
78// MARK: - Public Rust API (used by tests + by harn-cli embedders).
79
80/// Tunable knobs accepted by [`scan_project`].
81#[derive(Clone, Debug)]
82pub struct ScanProjectOptions {
83    /// Include hidden (`.`) entries during walking.
84    pub include_hidden: bool,
85    /// Honor `.gitignore`.
86    pub respect_gitignore: bool,
87    /// Hard cap on file count (0 = unlimited).
88    pub max_files: usize,
89    /// Run `git log` to compute churn scores.
90    pub include_git_history: bool,
91    /// Approximate token budget for the text repo map.
92    pub repo_map_token_budget: usize,
93}
94
95impl Default for ScanProjectOptions {
96    fn default() -> Self {
97        Self {
98            include_hidden: false,
99            respect_gitignore: true,
100            max_files: 0,
101            include_git_history: true,
102            repo_map_token_budget: 1200,
103        }
104    }
105}
106
107/// Run a full scan of `root`, persist a snapshot, and return the result.
108pub fn scan_project(root: &Path, opts: ScanProjectOptions) -> ScanResult {
109    let canonical = canonicalize(root);
110    let discover_opts = discover::DiscoverOptions {
111        include_hidden: opts.include_hidden,
112        respect_gitignore: opts.respect_gitignore,
113    };
114    let mut discovered = discover::discover_files(&canonical, discover_opts);
115    let truncated = if opts.max_files > 0 && discovered.len() > opts.max_files {
116        discovered.truncate(opts.max_files);
117        true
118    } else {
119        false
120    };
121
122    let (mut files, mut symbols, mut dependencies) = extract_per_file(&discovered);
123
124    scoring::compute_reference_counts(&mut symbols, &files);
125
126    if opts.include_git_history {
127        let churn = scoring::compute_churn_scores(&canonical);
128        scoring::apply_churn(&mut files, &churn);
129    }
130    scoring::compute_importance_scores(&mut symbols, &files);
131
132    test_mapping::map_test_files(&mut files);
133
134    let folder_records = folders::build_folder_records(&files, &symbols);
135    let test_commands = commands::detect_test_commands(&canonical);
136    let code_patterns = commands::detect_code_patterns(&files, &canonical);
137    let project = folders::build_project_metadata(
138        &canonical,
139        &files,
140        test_commands,
141        code_patterns,
142        now_iso8601(),
143    );
144    let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
145    let sub_projects = subproject::detect_subprojects(&canonical, 2);
146
147    sort_for_output(&mut files, &mut symbols, &mut dependencies);
148
149    let token = snapshot::root_to_token(&canonical);
150    let result = ScanResult {
151        snapshot_token: token,
152        truncated,
153        project,
154        folders: folder_records,
155        files,
156        symbols,
157        dependencies,
158        sub_projects,
159        repo_map,
160    };
161    snapshot::save(&canonical, &result);
162    result
163}
164
165/// Result returned by [`scan_incremental`].
166#[derive(Clone, Debug)]
167pub struct IncrementalScan {
168    /// Refreshed scan result.
169    pub result: ScanResult,
170    /// Path delta computed against the snapshot.
171    pub delta: ScanDelta,
172}
173
174/// Refresh the snapshot named by `token`. If the snapshot is missing, the
175/// diff is too large (>30%), or `changed_paths` is empty after `>30%` of
176/// the workspace mtime-mismatched, falls back to a full rescan.
177pub fn scan_incremental(
178    token: &str,
179    explicit_changed: Option<&[String]>,
180    opts: ScanProjectOptions,
181) -> IncrementalScan {
182    let root = snapshot::token_to_root(token);
183    let canonical = canonicalize(&root);
184
185    let cached = snapshot::load(&canonical);
186    let cached = match cached {
187        Some(c) => c,
188        None => {
189            let result = scan_project(&canonical, opts);
190            return IncrementalScan {
191                result,
192                delta: ScanDelta {
193                    full_rescan: true,
194                    ..ScanDelta::default()
195                },
196            };
197        }
198    };
199
200    let discover_opts = discover::DiscoverOptions {
201        include_hidden: opts.include_hidden,
202        respect_gitignore: opts.respect_gitignore,
203    };
204    let mut current = discover::discover_files(&canonical, discover_opts);
205    if opts.max_files > 0 && current.len() > opts.max_files {
206        current.truncate(opts.max_files);
207    }
208
209    let delta = compute_delta(&current, &cached, explicit_changed);
210    let total = current.len();
211    let needs_full_rescan =
212        total > 0 && (delta.added.len() + delta.modified.len()) * 10 > total * 3;
213
214    if needs_full_rescan {
215        let result = scan_project(&canonical, opts);
216        return IncrementalScan {
217            result,
218            delta: ScanDelta {
219                full_rescan: true,
220                ..delta
221            },
222        };
223    }
224
225    if delta.added.is_empty() && delta.modified.is_empty() && delta.removed.is_empty() {
226        return IncrementalScan {
227            result: cached,
228            delta,
229        };
230    }
231
232    // Incremental path: rebuild only the touched files, then re-finalize.
233    let mut files = cached.files;
234    let mut symbols = cached.symbols;
235    let mut dependencies = cached.dependencies;
236
237    let removed_set: std::collections::HashSet<&str> =
238        delta.removed.iter().map(|s| s.as_str()).collect();
239    let touched_set: std::collections::HashSet<&str> = delta
240        .added
241        .iter()
242        .chain(delta.modified.iter())
243        .map(|s| s.as_str())
244        .collect();
245
246    files.retain(|f| !removed_set.contains(f.relative_path.as_str()));
247    symbols.retain(|s| {
248        !removed_set.contains(s.file_path.as_str()) && !touched_set.contains(s.file_path.as_str())
249    });
250    dependencies.retain(|d| {
251        !removed_set.contains(d.from_file.as_str()) && !touched_set.contains(d.from_file.as_str())
252    });
253
254    let touched_entries: Vec<discover::DiscoveredFile> = current
255        .iter()
256        .filter(|e| touched_set.contains(e.relative_path.as_str()))
257        .cloned()
258        .collect();
259    let (new_files, new_symbols, new_deps) = extract_per_file(&touched_entries);
260
261    let mut by_path: std::collections::BTreeMap<String, FileRecord> = files
262        .into_iter()
263        .map(|f| (f.relative_path.clone(), f))
264        .collect();
265    for new_file in new_files {
266        by_path.insert(new_file.relative_path.clone(), new_file);
267    }
268    let mut files: Vec<FileRecord> = by_path.into_values().collect();
269    symbols.extend(new_symbols);
270    dependencies.extend(new_deps);
271
272    scoring::compute_reference_counts(&mut symbols, &files);
273    if opts.include_git_history {
274        let churn = scoring::compute_churn_scores(&canonical);
275        scoring::apply_churn(&mut files, &churn);
276    }
277    scoring::compute_importance_scores(&mut symbols, &files);
278    test_mapping::map_test_files(&mut files);
279
280    let folder_records = folders::build_folder_records(&files, &symbols);
281    let test_commands = commands::detect_test_commands(&canonical);
282    let code_patterns = commands::detect_code_patterns(&files, &canonical);
283    let project = folders::build_project_metadata(
284        &canonical,
285        &files,
286        test_commands,
287        code_patterns,
288        now_iso8601(),
289    );
290    let repo_map = folders::build_repo_map(&symbols, &files, opts.repo_map_token_budget);
291    let sub_projects = subproject::detect_subprojects(&canonical, 2);
292
293    sort_for_output(&mut files, &mut symbols, &mut dependencies);
294
295    let token = snapshot::root_to_token(&canonical);
296    let result = ScanResult {
297        snapshot_token: token,
298        truncated: cached.truncated,
299        project,
300        folders: folder_records,
301        files,
302        symbols,
303        dependencies,
304        sub_projects,
305        repo_map,
306    };
307    snapshot::save(&canonical, &result);
308    IncrementalScan { result, delta }
309}
310
311// MARK: - Internals
312
313fn canonicalize(root: &Path) -> PathBuf {
314    std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
315}
316
317fn extract_per_file(
318    discovered: &[discover::DiscoveredFile],
319) -> (Vec<FileRecord>, Vec<SymbolRecord>, Vec<DependencyEdge>) {
320    let mut files: Vec<FileRecord> = Vec::with_capacity(discovered.len());
321    let mut symbols: Vec<SymbolRecord> = Vec::new();
322    let mut dependencies: Vec<DependencyEdge> = Vec::new();
323
324    for entry in discovered {
325        let metadata = std::fs::metadata(&entry.absolute_path);
326        let size = metadata.as_ref().map(|m| m.len()).unwrap_or(0);
327        let modified = metadata
328            .as_ref()
329            .ok()
330            .and_then(|m| m.modified().ok())
331            .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
332            .map(|d| d.as_millis() as i64)
333            .unwrap_or(0);
334
335        let content = std::fs::read_to_string(&entry.absolute_path).unwrap_or_default();
336        if content.is_empty() && size != 0 {
337            // Likely a non-utf8 binary; skip symbol/import extraction but still record the file.
338        }
339        let language = extensions::file_extension(&entry.relative_path);
340        let imports = imports::extract_imports(&content, &language);
341        let file_symbols = symbols::extract_symbols(&content, &language, &entry.relative_path);
342        let line_count = count_lines(&content);
343
344        for imp in &imports {
345            dependencies.push(DependencyEdge {
346                from_file: entry.relative_path.clone(),
347                to_module: imp.clone(),
348            });
349        }
350        symbols.extend(file_symbols);
351
352        files.push(FileRecord {
353            id: entry.relative_path.clone(),
354            relative_path: entry.relative_path.clone(),
355            file_name: extensions::file_name(&entry.relative_path).to_string(),
356            language,
357            line_count,
358            size_bytes: size,
359            last_modified_unix_ms: modified,
360            imports,
361            churn_score: 0.0,
362            corresponding_test_file: None,
363        });
364    }
365
366    (files, symbols, dependencies)
367}
368
369fn count_lines(content: &str) -> usize {
370    if content.is_empty() {
371        return 0;
372    }
373    let nl = content.bytes().filter(|b| *b == b'\n').count();
374    let trailing = content.as_bytes().last() != Some(&b'\n');
375    nl + if trailing { 1 } else { 0 }
376}
377
378fn sort_for_output(
379    files: &mut [FileRecord],
380    symbols: &mut [SymbolRecord],
381    dependencies: &mut [DependencyEdge],
382) {
383    files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
384    symbols.sort_by(|a, b| a.id.cmp(&b.id));
385    dependencies.sort_by(|a, b| {
386        a.from_file
387            .cmp(&b.from_file)
388            .then_with(|| a.to_module.cmp(&b.to_module))
389    });
390}
391
392fn compute_delta(
393    current: &[discover::DiscoveredFile],
394    cached: &ScanResult,
395    explicit_changed: Option<&[String]>,
396) -> ScanDelta {
397    let cached_files: std::collections::BTreeMap<&str, &FileRecord> = cached
398        .files
399        .iter()
400        .map(|f| (f.relative_path.as_str(), f))
401        .collect();
402    let current_paths: std::collections::HashSet<&str> =
403        current.iter().map(|e| e.relative_path.as_str()).collect();
404
405    let added: Vec<String> = current
406        .iter()
407        .filter(|e| !cached_files.contains_key(e.relative_path.as_str()))
408        .map(|e| e.relative_path.clone())
409        .collect();
410    let removed: Vec<String> = cached
411        .files
412        .iter()
413        .filter(|f| !current_paths.contains(f.relative_path.as_str()))
414        .map(|f| f.relative_path.clone())
415        .collect();
416
417    let modified: Vec<String> = if let Some(explicit) = explicit_changed {
418        explicit
419            .iter()
420            .filter(|p| cached_files.contains_key(p.as_str()) && current_paths.contains(p.as_str()))
421            .cloned()
422            .collect()
423    } else {
424        let mut out = Vec::new();
425        for entry in current {
426            if let Some(prev) = cached_files.get(entry.relative_path.as_str()) {
427                let mtime = std::fs::metadata(&entry.absolute_path)
428                    .ok()
429                    .and_then(|m| m.modified().ok())
430                    .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
431                    .map(|d| d.as_millis() as i64)
432                    .unwrap_or(0);
433                if mtime > prev.last_modified_unix_ms {
434                    out.push(entry.relative_path.clone());
435                }
436            }
437        }
438        out
439    };
440
441    ScanDelta {
442        added,
443        modified,
444        removed,
445        full_rescan: false,
446    }
447}
448
449fn now_iso8601() -> String {
450    let now = SystemTime::now()
451        .duration_since(UNIX_EPOCH)
452        .unwrap_or_default();
453    let secs = now.as_secs() as i64;
454    let nanos = now.subsec_nanos();
455    let (year, month, day, hour, minute, second) = unix_to_civil(secs);
456    format!(
457        "{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}.{millis:03}Z",
458        millis = nanos / 1_000_000
459    )
460}
461
462/// Convert a unix timestamp (seconds, UTC) to civil date components. Uses
463/// Howard Hinnant's algorithm so we don't pull in `chrono` for one
464/// formatter.
465fn unix_to_civil(secs: i64) -> (i64, u32, u32, u32, u32, u32) {
466    let days = secs.div_euclid(86_400);
467    let day_secs = secs.rem_euclid(86_400);
468    let hour = (day_secs / 3600) as u32;
469    let minute = ((day_secs % 3600) / 60) as u32;
470    let second = (day_secs % 60) as u32;
471
472    // Days from 1970-01-01.
473    let z = days + 719_468;
474    let era = z.div_euclid(146_097);
475    let doe = z.rem_euclid(146_097) as u64;
476    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
477    let y = yoe as i64 + era * 400;
478    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
479    let mp = (5 * doy + 2) / 153;
480    let day = (doy - (153 * mp + 2) / 5 + 1) as u32;
481    let month = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
482    let year = if month <= 2 { y + 1 } else { y };
483    (year, month, day, hour, minute, second)
484}
485
486// MARK: - Builtin handlers (Harn dict ↔ Rust struct).
487
488fn scan_project_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
489    let raw = dict_arg(SCAN_PROJECT_BUILTIN, args)?;
490    let dict = raw.as_ref();
491    let root = require_string(SCAN_PROJECT_BUILTIN, dict, "root")?;
492    let opts = parse_options(SCAN_PROJECT_BUILTIN, dict)?;
493    let result = scan_project(Path::new(&root), opts);
494    Ok(scan_result_to_value(&result, None))
495}
496
497fn scan_incremental_handler(args: &[VmValue]) -> Result<VmValue, HostlibError> {
498    let raw = dict_arg(SCAN_INCREMENTAL_BUILTIN, args)?;
499    let dict = raw.as_ref();
500    let token = require_string(SCAN_INCREMENTAL_BUILTIN, dict, "snapshot_token")?;
501    let opts = parse_options(SCAN_INCREMENTAL_BUILTIN, dict)?;
502    let changed = parse_changed_paths(SCAN_INCREMENTAL_BUILTIN, dict)?;
503    let scan = scan_incremental(&token, changed.as_deref(), opts);
504    Ok(scan_result_to_value(&scan.result, Some(&scan.delta)))
505}
506
507fn parse_options(
508    builtin: &'static str,
509    dict: &std::collections::BTreeMap<String, VmValue>,
510) -> Result<ScanProjectOptions, HostlibError> {
511    let include_hidden = optional_bool(builtin, dict, "include_hidden", false)?;
512    let respect_gitignore = optional_bool(builtin, dict, "respect_gitignore", true)?;
513    let max_files = optional_int(builtin, dict, "max_files", 0)?;
514    let include_git_history = optional_bool(builtin, dict, "include_git_history", true)?;
515    let repo_map_token_budget = optional_int(builtin, dict, "repo_map_token_budget", 1200)?;
516    if max_files < 0 {
517        return Err(HostlibError::InvalidParameter {
518            builtin,
519            param: "max_files",
520            message: "must be >= 0".to_string(),
521        });
522    }
523    if repo_map_token_budget < 0 {
524        return Err(HostlibError::InvalidParameter {
525            builtin,
526            param: "repo_map_token_budget",
527            message: "must be >= 0".to_string(),
528        });
529    }
530    Ok(ScanProjectOptions {
531        include_hidden,
532        respect_gitignore,
533        max_files: max_files as usize,
534        include_git_history,
535        repo_map_token_budget: repo_map_token_budget as usize,
536    })
537}
538
539fn parse_changed_paths(
540    builtin: &'static str,
541    dict: &std::collections::BTreeMap<String, VmValue>,
542) -> Result<Option<Vec<String>>, HostlibError> {
543    let value = match dict.get("changed_paths") {
544        None | Some(VmValue::Nil) => return Ok(None),
545        Some(v) => v,
546    };
547    let list = match value {
548        VmValue::List(items) => items,
549        other => {
550            return Err(HostlibError::InvalidParameter {
551                builtin,
552                param: "changed_paths",
553                message: format!("expected list of strings, got {}", other.type_name()),
554            });
555        }
556    };
557    let mut out = Vec::with_capacity(list.len());
558    for item in list.iter() {
559        match item {
560            VmValue::String(s) => out.push(s.to_string()),
561            other => {
562                return Err(HostlibError::InvalidParameter {
563                    builtin,
564                    param: "changed_paths",
565                    message: format!("non-string entry: {}", other.type_name()),
566                });
567            }
568        }
569    }
570    Ok(Some(out))
571}
572
573fn scan_result_to_value(result: &ScanResult, delta: Option<&ScanDelta>) -> VmValue {
574    let mut entries: Vec<(&'static str, VmValue)> = vec![
575        ("snapshot_token", str_value(&result.snapshot_token)),
576        ("truncated", VmValue::Bool(result.truncated)),
577        ("project", project_to_value(&result.project)),
578        ("folders", list_of(&result.folders, folder_to_value)),
579        ("files", list_of(&result.files, file_to_value)),
580        ("symbols", list_of(&result.symbols, symbol_to_value)),
581        (
582            "dependencies",
583            list_of(&result.dependencies, dependency_to_value),
584        ),
585        (
586            "sub_projects",
587            list_of(&result.sub_projects, subproject_to_value),
588        ),
589        ("repo_map", str_value(&result.repo_map)),
590    ];
591    if let Some(d) = delta {
592        entries.push(("delta", delta_to_value(d)));
593    }
594    build_dict(entries)
595}
596
597fn list_of<T>(items: &[T], to_value: fn(&T) -> VmValue) -> VmValue {
598    let list: Vec<VmValue> = items.iter().map(to_value).collect();
599    VmValue::List(Rc::new(list))
600}
601
602fn project_to_value(project: &ProjectMetadata) -> VmValue {
603    let test_commands_entries: Vec<(String, VmValue)> = project
604        .test_commands
605        .iter()
606        .map(|(k, v)| (k.clone(), str_value(v)))
607        .collect();
608    let test_commands_dict = build_dict(test_commands_entries);
609
610    let detected: VmValue = project
611        .detected_test_command
612        .as_deref()
613        .map(str_value)
614        .unwrap_or(VmValue::Nil);
615
616    let code_patterns: Vec<VmValue> = project.code_patterns.iter().map(str_value).collect();
617
618    build_dict([
619        ("name", str_value(&project.name)),
620        ("root_path", str_value(&project.root_path)),
621        ("languages", list_of(&project.languages, language_to_value)),
622        ("test_commands", test_commands_dict),
623        ("detected_test_command", detected),
624        ("code_patterns", VmValue::List(Rc::new(code_patterns))),
625        ("total_files", VmValue::Int(project.total_files as i64)),
626        ("total_lines", VmValue::Int(project.total_lines as i64)),
627        ("last_scanned_at", str_value(&project.last_scanned_at)),
628    ])
629}
630
631fn language_to_value(stat: &LanguageStat) -> VmValue {
632    build_dict([
633        ("name", str_value(&stat.name)),
634        ("file_count", VmValue::Int(stat.file_count as i64)),
635        ("line_count", VmValue::Int(stat.line_count as i64)),
636        ("percentage", VmValue::Float(stat.percentage)),
637    ])
638}
639
640fn folder_to_value(folder: &FolderRecord) -> VmValue {
641    let names: Vec<VmValue> = folder.key_symbol_names.iter().map(str_value).collect();
642    build_dict([
643        ("id", str_value(&folder.id)),
644        ("relative_path", str_value(&folder.relative_path)),
645        ("file_count", VmValue::Int(folder.file_count as i64)),
646        ("line_count", VmValue::Int(folder.line_count as i64)),
647        ("dominant_language", str_value(&folder.dominant_language)),
648        ("key_symbol_names", VmValue::List(Rc::new(names))),
649    ])
650}
651
652fn file_to_value(file: &FileRecord) -> VmValue {
653    let imports: Vec<VmValue> = file.imports.iter().map(str_value).collect();
654    let test_pair = file
655        .corresponding_test_file
656        .as_deref()
657        .map(str_value)
658        .unwrap_or(VmValue::Nil);
659    build_dict([
660        ("id", str_value(&file.id)),
661        ("relative_path", str_value(&file.relative_path)),
662        ("file_name", str_value(&file.file_name)),
663        ("language", str_value(&file.language)),
664        ("line_count", VmValue::Int(file.line_count as i64)),
665        ("size_bytes", VmValue::Int(file.size_bytes as i64)),
666        (
667            "last_modified_unix_ms",
668            VmValue::Int(file.last_modified_unix_ms),
669        ),
670        ("imports", VmValue::List(Rc::new(imports))),
671        ("churn_score", VmValue::Float(file.churn_score)),
672        ("corresponding_test_file", test_pair),
673    ])
674}
675
676fn symbol_to_value(symbol: &SymbolRecord) -> VmValue {
677    let container = symbol
678        .container
679        .as_deref()
680        .map(str_value)
681        .unwrap_or(VmValue::Nil);
682    build_dict([
683        ("id", str_value(&symbol.id)),
684        ("name", str_value(&symbol.name)),
685        ("kind", str_value(symbol.kind.keyword())),
686        ("file_path", str_value(&symbol.file_path)),
687        ("line", VmValue::Int(symbol.line as i64)),
688        ("signature", str_value(&symbol.signature)),
689        ("container", container),
690        (
691            "reference_count",
692            VmValue::Int(symbol.reference_count as i64),
693        ),
694        ("importance_score", VmValue::Float(symbol.importance_score)),
695    ])
696}
697
698fn dependency_to_value(dep: &DependencyEdge) -> VmValue {
699    build_dict([
700        ("from_file", str_value(&dep.from_file)),
701        ("to_module", str_value(&dep.to_module)),
702    ])
703}
704
705fn subproject_to_value(sp: &SubProject) -> VmValue {
706    build_dict([
707        ("path", str_value(&sp.path)),
708        ("name", str_value(&sp.name)),
709        ("language", str_value(&sp.language)),
710        ("project_marker", str_value(&sp.project_marker)),
711    ])
712}
713
714fn delta_to_value(delta: &ScanDelta) -> VmValue {
715    let added: Vec<VmValue> = delta.added.iter().map(str_value).collect();
716    let modified: Vec<VmValue> = delta.modified.iter().map(str_value).collect();
717    let removed: Vec<VmValue> = delta.removed.iter().map(str_value).collect();
718    build_dict([
719        ("added", VmValue::List(Rc::new(added))),
720        ("modified", VmValue::List(Rc::new(modified))),
721        ("removed", VmValue::List(Rc::new(removed))),
722        ("full_rescan", VmValue::Bool(delta.full_rescan)),
723    ])
724}