Skip to main content

harn_rules_hostlib/
lib.rs

1//! Host capability exposing the `harn-rules` declarative rule engine to
2//! Harn as `rules.search` / `rules.report` / `rules.apply`.
3//!
4//! This crate lives outside `harn-hostlib` on purpose: `harn-rules` already
5//! depends on `harn-hostlib` (for the tree-sitter grammars), so the rules
6//! builtins would form a dependency cycle if they lived there. An embedder
7//! (harn-cli, harn-serve) calls [`install`] alongside `harn_hostlib::install_default`.
8//!
9//! ## Builtins
10//!
11//! - `rules.search` (read-only) — run a rule and return its matches.
12//! - `rules.report` (read-only) — run a rule in report-only mode and return
13//!   a [`harn_rules::DataTable`] (counts + per-match rows).
14//! - `rules.diagnostics` (read-only) — run a **declarative** rule and return
15//!   its [`harn_rules::Diagnostic`]s (message + severity + span + fix).
16//! - `rules.visit` (read-only, **async**) — the **imperative** escape hatch:
17//!   run a rule's matcher, then invoke a `.harn` visitor
18//!   `on_match($node, $ctx)` once per match. The visitor returns its
19//!   report(s) — `nil`/`false` to skip, a `{message, fix, safety}` dict, or
20//!   a list of them — which the engine turns into diagnostics of the same
21//!   shape `rules.diagnostics` emits. The visitor has full programmatic
22//!   control (compute a message/fix from the captured metavars), which the
23//!   declarative form cannot.
24//! - `rules.apply` (write-gated) — apply a codemod rule's `fix`; writes only
25//!   when `dry_run: false` *and* the rule is safe to auto-apply (or
26//!   `allow_unsafe: true`). Shares the deterministic-tools gate with the
27//!   other mutating builtins.
28//!
29//! A rule is passed as its TOML source (`rule`), so an agent can author and
30//! run a rule — declarative *or* imperative — entirely from `.harn` without
31//! recompiling the binary.
32//!
33//! ### Why `rules.visit` is async, and why it returns rather than mutates
34//!
35//! A *synchronous* hostlib builtin cannot call a `.harn` closure: the VM's
36//! [`Vm::call_closure_pub`] is async-only. So the visitor is registered as an
37//! **async** builtin (directly on the VM via [`Vm::register_async_builtin`],
38//! bypassing the sync [`HostlibRegistry`]), which can obtain a child VM from
39//! its [`AsyncBuiltinCtx`] and call back per match.
40//!
41//! The visitor **returns** its reports instead of calling a mutating
42//! `ctx.report(...)`: Harn closures capture by value, so a Harn-side
43//! accumulator could not collect across calls, and `VmValue` has no callable
44//! variant that carries captured Rust state to embed a stateful `report`
45//! method in `ctx`. Returning is both the sound option and the simpler one.
46
47use std::collections::BTreeMap;
48use std::path::{Path, PathBuf};
49use std::sync::Arc;
50
51use harn_hostlib::ast::Language;
52use harn_hostlib::tools::permissions::gated_handler;
53use harn_hostlib::{
54    BuiltinRegistry, HostlibCapability, HostlibError, HostlibRegistry, RegisteredBuiltin,
55};
56use harn_vm::{AsyncBuiltinCtx, Vm, VmError, VmValue};
57
58use harn_rules::{
59    data_table, Applicability, BindingMetadata, CompiledRule, Diagnostic, ResolvedBinding, Rule,
60    RuleMatch, Safety, Severity, SourceFile, Span,
61};
62
63const SEARCH: &str = "hostlib_rules_search";
64const REPORT: &str = "hostlib_rules_report";
65const DIAGNOSTICS: &str = "hostlib_rules_diagnostics";
66const VISIT: &str = "hostlib_rules_visit";
67const APPLY: &str = "hostlib_rules_apply";
68const FOLD: &str = "hostlib_rules_fold";
69const LINT_RUN: &str = "hostlib_lint_run";
70
71/// The `rules` host capability.
72#[derive(Default)]
73pub struct RulesCapability;
74
75impl HostlibCapability for RulesCapability {
76    fn module_name(&self) -> &'static str {
77        "rules"
78    }
79
80    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
81        registry.register(RegisteredBuiltin {
82            name: SEARCH,
83            module: "rules",
84            method: "search",
85            handler: Arc::new(search_run),
86        });
87        registry.register(RegisteredBuiltin {
88            name: REPORT,
89            module: "rules",
90            method: "report",
91            handler: Arc::new(report_run),
92        });
93        registry.register(RegisteredBuiltin {
94            name: DIAGNOSTICS,
95            module: "rules",
96            method: "diagnostics",
97            handler: Arc::new(diagnostics_run),
98        });
99        // `apply` writes files, so it shares the deterministic-tools gate.
100        registry.register(RegisteredBuiltin {
101            name: APPLY,
102            module: "rules",
103            method: "apply",
104            handler: gated_handler(APPLY, apply_run),
105        });
106        // `fold` also writes; same gate.
107        registry.register(RegisteredBuiltin {
108            name: FOLD,
109            module: "rules",
110            method: "fold",
111            handler: gated_handler(FOLD, fold_run),
112        });
113    }
114}
115
116/// The `lint` host capability (#2851): runs the Harn linter for an
117/// agent/IDE/cloud caller, returning the same diagnostics the CLI emits.
118#[derive(Default)]
119pub struct LintCapability;
120
121impl HostlibCapability for LintCapability {
122    fn module_name(&self) -> &'static str {
123        "lint"
124    }
125
126    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
127        // Read-only: lint.run parses + lints in memory, never writes.
128        registry.register(RegisteredBuiltin {
129            name: LINT_RUN,
130            module: "lint",
131            method: "run",
132            handler: Arc::new(lint_run),
133        });
134    }
135}
136
137/// Install the `rules` + `lint` capabilities into a VM. Call this alongside
138/// `harn_hostlib::install_default`.
139pub fn install(vm: &mut Vm) {
140    HostlibRegistry::new()
141        .with(RulesCapability)
142        .with(LintCapability)
143        .register_into_vm(vm);
144    // `rules.visit` invokes a `.harn` closure per match, which only an async
145    // builtin can do (`call_closure_pub` is async). It is therefore registered
146    // directly on the VM rather than through the sync `HostlibRegistry`.
147    vm.register_async_builtin(VISIT, visit_run);
148}
149
150// ---------------------------------------------------------------------------
151// Builtins
152// ---------------------------------------------------------------------------
153
154fn search_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
155    let dict = first_dict(SEARCH, args)?;
156    let rule = compile_rule(SEARCH, &dict)?;
157    let files = load_files(SEARCH, &dict)?;
158
159    let mut matches = Vec::new();
160    for file in &files {
161        for m in rule.run(&file.source).map_err(|e| backend(SEARCH, &e))? {
162            matches.push(match_to_vm(&file.path, &m));
163        }
164    }
165    Ok(dict_vm([
166        ("result", str_vm("ok")),
167        ("match_count", VmValue::Int(matches.len() as i64)),
168        ("matches", VmValue::List(Arc::new(matches))),
169    ]))
170}
171
172fn report_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
173    let dict = first_dict(REPORT, args)?;
174    let rule = compile_rule(REPORT, &dict)?;
175    let files = load_files(REPORT, &dict)?;
176    let table = data_table(&rule, &files).map_err(|e| backend(REPORT, &e))?;
177    Ok(json_to_vm(&table.to_json_value()))
178}
179
180fn diagnostics_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
181    let dict = first_dict(DIAGNOSTICS, args)?;
182    let rule = compile_rule(DIAGNOSTICS, &dict)?;
183    let files = load_files(DIAGNOSTICS, &dict)?;
184
185    let mut diagnostics = Vec::new();
186    for file in &files {
187        for d in rule
188            .diagnostics(&file.source)
189            .map_err(|e| backend(DIAGNOSTICS, &e))?
190        {
191            diagnostics.push(diagnostic_vm(&file.path, &d));
192        }
193    }
194    Ok(dict_vm([
195        ("result", str_vm("ok")),
196        ("diagnostic_count", VmValue::Int(diagnostics.len() as i64)),
197        ("diagnostics", VmValue::List(Arc::new(diagnostics))),
198    ]))
199}
200
201/// The imperative escape hatch (#2878): run the rule's matcher, then call the
202/// `.harn` visitor `on_match($node, $ctx)` once per match. The visitor's
203/// return value becomes diagnostics of the same shape `rules.diagnostics`
204/// emits. Read-only — it never writes; the agent applies fixes itself.
205async fn visit_run(ctx: AsyncBuiltinCtx, args: Vec<VmValue>) -> Result<VmValue, VmError> {
206    let dict = first_dict(VISIT, &args).map_err(host_err)?;
207    let rule = compile_rule(VISIT, &dict).map_err(host_err)?;
208    let files = load_files(VISIT, &dict).map_err(host_err)?;
209    let visitor = match dict.get("on_match") {
210        Some(VmValue::Closure(c)) => c.clone(),
211        _ => {
212            return Err(VmError::Runtime(format!(
213                "{VISIT}: `on_match` must be a function `fn(node, ctx)`"
214            )))
215        }
216    };
217
218    let default_severity = rule.severity();
219    let default_safety = rule.safety();
220    let rule_id = rule.id().to_string();
221
222    let mut vm = ctx.child_vm();
223    let mut diagnostics = Vec::new();
224    for file in &files {
225        let matches = rule
226            .run(&file.source)
227            .map_err(|e| host_err(backend(VISIT, &e)))?;
228        let file_ctx = ctx_vm(&file.path, file.language, &file.source, &rule_id);
229        for m in &matches {
230            let node = node_vm(m);
231            let ret = vm
232                .call_closure_pub(&visitor, &[node, file_ctx.clone()])
233                .await?;
234            ctx.forward_output(&vm.take_output());
235            for report in reports_from_return(ret) {
236                diagnostics.push(report_to_diagnostic_vm(
237                    &file.path,
238                    &rule_id,
239                    m.span,
240                    report,
241                    default_severity,
242                    default_safety,
243                ));
244            }
245        }
246    }
247    Ok(dict_vm([
248        ("result", str_vm("ok")),
249        ("diagnostic_count", VmValue::Int(diagnostics.len() as i64)),
250        ("diagnostics", VmValue::List(Arc::new(diagnostics))),
251    ]))
252}
253
254fn apply_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
255    let dict = first_dict(APPLY, args)?;
256    let rule = compile_rule(APPLY, &dict)?;
257    let dry_run = optional_bool(&dict, "dry_run", true);
258    let allow_unsafe = optional_bool(&dict, "allow_unsafe", false);
259    // fmt post-pass (#2847): normalize rewritten `.harn` so a batch lands
260    // fmt-stable. On by default; `format: false` opts out.
261    let format = optional_bool(&dict, "format", true);
262    let files = load_files(APPLY, &dict)?;
263
264    let auto_applicable = rule.safety().is_auto_applicable();
265    let mut entries = Vec::new();
266    for file in &files {
267        let outcome = rule.apply(&file.source).map_err(|e| backend(APPLY, &e))?;
268        // Only `.harn` has a formatter; harn_fmt is idempotent, so a later
269        // `harn fmt` is a no-op. A formatter error falls back to the raw
270        // rewrite rather than failing the codemod.
271        let formatted = format && outcome.changed && file.language == Language::Harn;
272        let rewritten = if formatted {
273            match harn_fmt::format_source(&outcome.rewritten) {
274                Ok(canonical) => canonical,
275                Err(_) => outcome.rewritten,
276            }
277        } else {
278            outcome.rewritten
279        };
280        // Write only on a real apply, when the edit is safe to auto-apply
281        // (or explicitly allowed), and the rule actually changed the file.
282        let applied = !dry_run && outcome.changed && (auto_applicable || allow_unsafe);
283        if applied {
284            std::fs::write(&file.path, &rewritten).map_err(|e| HostlibError::Backend {
285                builtin: APPLY,
286                message: format!("write `{}`: {e}", file.path.display()),
287            })?;
288        }
289        entries.push(dict_vm([
290            ("path", str_vm(file.path.display().to_string())),
291            ("changed", VmValue::Bool(outcome.changed)),
292            ("applied", VmValue::Bool(applied)),
293            ("idempotent", VmValue::Bool(outcome.idempotent)),
294            ("formatted", VmValue::Bool(formatted)),
295            ("safety", str_vm(format!("{:?}", outcome.safety))),
296            // The original source, so callers can render a diff without a
297            // (sandboxed) re-read of the file.
298            ("before", str_vm(&file.source)),
299            ("preview", str_vm(rewritten)),
300        ]));
301    }
302    Ok(dict_vm([
303        ("result", str_vm("ok")),
304        ("dry_run", VmValue::Bool(dry_run)),
305        ("auto_applicable", VmValue::Bool(auto_applicable)),
306        ("files", VmValue::List(Arc::new(entries))),
307    ]))
308}
309
310/// `rules.fold` (#2824): fold consecutive `let x = src?.x ?? d` runs into a
311/// single destructure-with-defaults. A specialized, behavior-preserving
312/// codemod (the engine can't fold statement sequences declaratively). Writes
313/// only on a real apply (`dry_run: false`); shares the deterministic gate.
314fn fold_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
315    let dict = first_dict(FOLD, args)?;
316    let dry_run = optional_bool(&dict, "dry_run", true);
317    let files = load_files(FOLD, &dict)?;
318
319    let mut entries = Vec::new();
320    for file in &files {
321        let raw_folded =
322            harn_rules::fold::fold_destructure_defaults(&file.source, file.language.name())
323                .map_err(|e| backend(FOLD, &e))?;
324        let raw_changed = raw_folded != file.source;
325        let formatted = raw_changed && file.language == Language::Harn;
326        let folded = if formatted {
327            match harn_fmt::format_source(&raw_folded) {
328                Ok(canonical) => canonical,
329                Err(_) => raw_folded,
330            }
331        } else {
332            raw_folded
333        };
334        let changed = folded != file.source;
335        let idempotent = harn_rules::fold::fold_destructure_defaults(&folded, file.language.name())
336            .map(|again| again == folded)
337            .unwrap_or(false);
338        let applied = !dry_run && changed;
339        if applied {
340            std::fs::write(&file.path, &folded).map_err(|e| HostlibError::Backend {
341                builtin: FOLD,
342                message: format!("write `{}`: {e}", file.path.display()),
343            })?;
344        }
345        entries.push(dict_vm([
346            ("path", str_vm(file.path.display().to_string())),
347            ("changed", VmValue::Bool(changed)),
348            ("applied", VmValue::Bool(applied)),
349            ("idempotent", VmValue::Bool(idempotent)),
350            ("formatted", VmValue::Bool(formatted)),
351            ("safety", str_vm("BehaviorPreserving")),
352            ("before", str_vm(&file.source)),
353            ("preview", str_vm(folded)),
354        ]));
355    }
356    Ok(dict_vm([
357        ("result", str_vm("ok")),
358        ("dry_run", VmValue::Bool(dry_run)),
359        ("files", VmValue::List(Arc::new(entries))),
360    ]))
361}
362
363/// `lint.run` (#2851): lint a Harn source string and return its diagnostics, so
364/// an agent / IDE / cloud caller gets the same findings as `harn lint` without
365/// shelling out. Read-only. Params: `{source, disabled?, severity?}` where
366/// `severity` maps a rule id to `"error"` / `"warning"` / `"info"`.
367fn lint_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
368    let dict = first_dict(LINT_RUN, args)?;
369    let source = require_string(LINT_RUN, &dict, "source")?;
370    let disabled = optional_string_list(&dict, "disabled");
371    let severity_overrides = parse_severity_overrides(&dict);
372
373    let program = harn_parser::parse_source(&source).map_err(|e| HostlibError::Backend {
374        builtin: LINT_RUN,
375        message: format!("parse error: {e}"),
376    })?;
377    let options = harn_lint::LintOptions {
378        severity_overrides,
379        ..Default::default()
380    };
381    let diagnostics = harn_lint::lint_with_options(
382        &program,
383        &disabled,
384        Some(&source),
385        &std::collections::HashSet::new(),
386        &options,
387    );
388    let items: Vec<VmValue> = diagnostics.iter().map(lint_diagnostic_vm).collect();
389    Ok(dict_vm([
390        ("result", str_vm("ok")),
391        ("diagnostic_count", VmValue::Int(items.len() as i64)),
392        ("diagnostics", VmValue::List(Arc::new(items))),
393    ]))
394}
395
396/// Parse a `severity` dict param (`{rule: "error"|"warning"|"info"}`) into the
397/// linter's override map. Unknown severities are skipped.
398fn parse_severity_overrides(
399    dict: &harn_vm::value::DictMap,
400) -> std::collections::HashMap<String, harn_lint::LintSeverity> {
401    let mut out = std::collections::HashMap::new();
402    if let Some(VmValue::Dict(map)) = dict.get("severity") {
403        for (rule, value) in map.iter() {
404            if let VmValue::String(s) = value {
405                let severity = match s.to_ascii_lowercase().as_str() {
406                    "error" => Some(harn_lint::LintSeverity::Error),
407                    "warning" | "warn" => Some(harn_lint::LintSeverity::Warning),
408                    "info" => Some(harn_lint::LintSeverity::Info),
409                    _ => None,
410                };
411                if let Some(severity) = severity {
412                    out.insert(rule.clone(), severity);
413                }
414            }
415        }
416    }
417    out
418}
419
420/// Marshal a [`harn_lint::LintDiagnostic`] into a VM dict, mirroring the
421/// fields the CLI renders (code, rule, message, severity, span).
422fn lint_diagnostic_vm(diag: &harn_lint::LintDiagnostic) -> VmValue {
423    let severity = match diag.severity {
424        harn_lint::LintSeverity::Error => "error",
425        harn_lint::LintSeverity::Warning => "warning",
426        harn_lint::LintSeverity::Info => "info",
427    };
428    dict_vm([
429        ("code", str_vm(diag.code.as_str())),
430        ("rule", str_vm(diag.rule.as_ref())),
431        ("message", str_vm(&diag.message)),
432        ("severity", str_vm(severity)),
433        ("start_byte", VmValue::Int(diag.span.start as i64)),
434        ("end_byte", VmValue::Int(diag.span.end as i64)),
435        ("line", VmValue::Int(diag.span.line as i64)),
436        ("column", VmValue::Int(diag.span.column as i64)),
437    ])
438}
439
440// ---------------------------------------------------------------------------
441// Shared parsing / conversion
442// ---------------------------------------------------------------------------
443
444fn compile_rule(
445    builtin: &'static str,
446    dict: &harn_vm::value::DictMap,
447) -> Result<CompiledRule, HostlibError> {
448    let toml = require_string(builtin, dict, "rule")?;
449    let rule = Rule::from_toml_str(&toml).map_err(|e| HostlibError::InvalidParameter {
450        builtin,
451        param: "rule",
452        message: format!("invalid rule TOML: {e}"),
453    })?;
454    CompiledRule::compile(&rule).map_err(|e| HostlibError::InvalidParameter {
455        builtin,
456        param: "rule",
457        message: e.to_string(),
458    })
459}
460
461/// Load the fileset: inline `source` (+ `language`) for a single buffer, or
462/// `paths` read from disk (language inferred per file; non-UTF8 and
463/// undetectable files are skipped).
464fn load_files(
465    builtin: &'static str,
466    dict: &harn_vm::value::DictMap,
467) -> Result<Vec<SourceFile>, HostlibError> {
468    if let Some(source) = optional_string(dict, "source") {
469        let language_name = require_string(builtin, dict, "language")?;
470        let language =
471            Language::from_name(&language_name).ok_or_else(|| HostlibError::InvalidParameter {
472                builtin,
473                param: "language",
474                message: format!("unknown language `{language_name}`"),
475            })?;
476        let path = optional_string(dict, "path").unwrap_or_else(|| "<inline>".to_string());
477        return Ok(vec![SourceFile {
478            path: PathBuf::from(path),
479            language,
480            source,
481        }]);
482    }
483
484    let paths = optional_string_list(dict, "paths");
485    if paths.is_empty() {
486        return Err(HostlibError::MissingParameter {
487            builtin,
488            param: "paths",
489        });
490    }
491    let mut files = Vec::new();
492    for path in paths {
493        let bytes = std::fs::read(&path).map_err(|e| HostlibError::Backend {
494            builtin,
495            message: format!("read `{path}`: {e}"),
496        })?;
497        let Ok(contents) = String::from_utf8(bytes) else {
498            continue;
499        };
500        if let Some(file) = SourceFile::detect(&path, contents) {
501            files.push(file);
502        }
503    }
504    Ok(files)
505}
506
507fn match_to_vm(path: &std::path::Path, m: &RuleMatch) -> VmValue {
508    let captures: harn_vm::value::DictMap = m
509        .bindings
510        .iter()
511        .map(|(name, b)| (name.clone(), str_vm(&b.text)))
512        .collect();
513    let capture_metadata = capture_metadata_vm(m);
514    dict_vm([
515        ("path", str_vm(path.display().to_string())),
516        ("text", str_vm(&m.text)),
517        ("start_row", VmValue::Int(m.span.start_row as i64)),
518        ("start_col", VmValue::Int(m.span.start_col as i64)),
519        ("end_row", VmValue::Int(m.span.end_row as i64)),
520        ("end_col", VmValue::Int(m.span.end_col as i64)),
521        ("captures", VmValue::dict(captures)),
522        ("capture_metadata", capture_metadata),
523    ])
524}
525
526fn backend(builtin: &'static str, err: &harn_rules::RulesError) -> HostlibError {
527    HostlibError::Backend {
528        builtin,
529        message: err.to_string(),
530    }
531}
532
533/// Lower a `HostlibError` into a `VmError` for the async `rules.visit` path
534/// (which must return `VmError`, not `HostlibError`).
535fn host_err(err: HostlibError) -> VmError {
536    VmError::Runtime(err.to_string())
537}
538
539/// One report a `.harn` visitor returned for a single match. Every field is
540/// optional: an empty report (e.g. the visitor returned `true`) flags the
541/// match using the rule's own defaults.
542#[derive(Default)]
543struct ReportSpec {
544    message: Option<String>,
545    fix: Option<String>,
546    safety: Option<Safety>,
547    severity: Option<Severity>,
548}
549
550/// The `node` value handed to a visitor: the matched text, its metavar
551/// captures, and its span.
552fn node_vm(m: &RuleMatch) -> VmValue {
553    let captures: harn_vm::value::DictMap = m
554        .bindings
555        .iter()
556        .map(|(name, b)| (name.clone(), str_vm(&b.text)))
557        .collect();
558    let capture_metadata = capture_metadata_vm(m);
559    dict_vm([
560        ("text", str_vm(&m.text)),
561        ("captures", VmValue::dict(captures)),
562        ("capture_metadata", capture_metadata),
563        ("start_row", VmValue::Int(m.span.start_row as i64)),
564        ("start_col", VmValue::Int(m.span.start_col as i64)),
565        ("end_row", VmValue::Int(m.span.end_row as i64)),
566        ("end_col", VmValue::Int(m.span.end_col as i64)),
567    ])
568}
569
570fn capture_metadata_vm(m: &RuleMatch) -> VmValue {
571    let metadata: harn_vm::value::DictMap = m
572        .bindings
573        .iter()
574        .filter(|(_, binding)| !binding.metadata.is_empty())
575        .map(|(name, binding)| (name.clone(), binding_metadata_vm(&binding.metadata)))
576        .collect();
577    VmValue::dict(metadata)
578}
579
580fn binding_metadata_vm(metadata: &BindingMetadata) -> VmValue {
581    let mut entries = BTreeMap::new();
582    if let Some(ty) = &metadata.ty {
583        entries.insert("type".into(), str_vm(ty));
584    }
585    if let Some(resolved) = &metadata.resolved {
586        entries.insert("resolved".into(), resolved_binding_vm(resolved));
587    }
588    VmValue::dict(entries)
589}
590
591fn resolved_binding_vm(resolved: &ResolvedBinding) -> VmValue {
592    dict_vm([
593        ("id", str_vm(&resolved.id)),
594        ("name", str_vm(&resolved.name)),
595        ("kind", str_vm(&resolved.kind)),
596        ("start_row", VmValue::Int(resolved.span.start_row as i64)),
597        ("start_col", VmValue::Int(resolved.span.start_col as i64)),
598        ("end_row", VmValue::Int(resolved.span.end_row as i64)),
599        ("end_col", VmValue::Int(resolved.span.end_col as i64)),
600    ])
601}
602
603/// The read-only `ctx` value handed to a visitor: where the match lives and
604/// what produced it.
605fn ctx_vm(path: &Path, language: Language, source: &str, rule_id: &str) -> VmValue {
606    dict_vm([
607        ("path", str_vm(path.display().to_string())),
608        ("language", str_vm(language.name())),
609        ("source", str_vm(source)),
610        ("rule_id", str_vm(rule_id)),
611    ])
612}
613
614/// Build a diagnostic dict — the one shape both `rules.diagnostics` and
615/// `rules.visit` emit, so an equivalent declarative and imperative rule
616/// produce identical output.
617fn diagnostic_dict(
618    path: &Path,
619    rule_id: &str,
620    message: &str,
621    severity: Severity,
622    span: Span,
623    fix: Option<String>,
624    applicability: Applicability,
625) -> VmValue {
626    dict_vm([
627        ("path", str_vm(path.display().to_string())),
628        ("rule_id", str_vm(rule_id)),
629        ("message", str_vm(message)),
630        ("severity", str_vm(severity.as_str())),
631        ("start_row", VmValue::Int(span.start_row as i64)),
632        ("start_col", VmValue::Int(span.start_col as i64)),
633        ("end_row", VmValue::Int(span.end_row as i64)),
634        ("end_col", VmValue::Int(span.end_col as i64)),
635        ("applicability", str_vm(applicability.as_str())),
636        ("fix", fix.map(str_vm).unwrap_or(VmValue::Nil)),
637    ])
638}
639
640fn diagnostic_vm(path: &Path, d: &Diagnostic) -> VmValue {
641    diagnostic_dict(
642        path,
643        &d.rule_id,
644        &d.message,
645        d.severity,
646        d.span,
647        d.fix.clone(),
648        d.applicability,
649    )
650}
651
652/// Turn a visitor's [`ReportSpec`] into the same diagnostic dict, located at
653/// the match's span and falling back to the rule's defaults.
654fn report_to_diagnostic_vm(
655    path: &Path,
656    rule_id: &str,
657    span: Span,
658    report: ReportSpec,
659    default_severity: Severity,
660    default_safety: Safety,
661) -> VmValue {
662    let severity = report.severity.unwrap_or(default_severity);
663    let safety = report.safety.unwrap_or(default_safety);
664    diagnostic_dict(
665        path,
666        rule_id,
667        report.message.as_deref().unwrap_or(""),
668        severity,
669        span,
670        report.fix,
671        safety.applicability(),
672    )
673}
674
675/// Interpret a visitor's return value: `nil`/`false` skips, `true` flags with
676/// rule defaults, a dict is one report, a list is many (skipping `nil`/`false`
677/// entries).
678fn reports_from_return(ret: VmValue) -> Vec<ReportSpec> {
679    match ret {
680        VmValue::Nil | VmValue::Bool(false) => Vec::new(),
681        VmValue::Bool(true) => vec![ReportSpec::default()],
682        VmValue::Dict(d) => vec![report_from_dict(&d)],
683        VmValue::List(items) => items.iter().filter_map(report_from_item).collect(),
684        _ => Vec::new(),
685    }
686}
687
688fn report_from_item(v: &VmValue) -> Option<ReportSpec> {
689    match v {
690        VmValue::Nil | VmValue::Bool(false) => None,
691        VmValue::Bool(true) => Some(ReportSpec::default()),
692        VmValue::Dict(d) => Some(report_from_dict(d)),
693        _ => None,
694    }
695}
696
697fn report_from_dict(d: &harn_vm::value::DictMap) -> ReportSpec {
698    ReportSpec {
699        message: optional_string(d, "message"),
700        fix: optional_string(d, "fix"),
701        safety: optional_string(d, "safety").and_then(|s| parse_safety(&s)),
702        severity: optional_string(d, "severity").and_then(|s| parse_severity(&s)),
703    }
704}
705
706fn parse_severity(s: &str) -> Option<Severity> {
707    match s {
708        "info" => Some(Severity::Info),
709        "warning" => Some(Severity::Warning),
710        "error" => Some(Severity::Error),
711        _ => None,
712    }
713}
714
715fn parse_safety(s: &str) -> Option<Safety> {
716    match s {
717        "format-only" => Some(Safety::FormatOnly),
718        "behavior-preserving" => Some(Safety::BehaviorPreserving),
719        "scope-local" => Some(Safety::ScopeLocal),
720        "surface-changing" => Some(Safety::SurfaceChanging),
721        "capability-changing" => Some(Safety::CapabilityChanging),
722        "needs-human" => Some(Safety::NeedsHuman),
723        _ => None,
724    }
725}
726
727fn json_to_vm(value: &serde_json::Value) -> VmValue {
728    match value {
729        serde_json::Value::Null => VmValue::Nil,
730        serde_json::Value::Bool(b) => VmValue::Bool(*b),
731        serde_json::Value::Number(n) => n
732            .as_i64()
733            .map(VmValue::Int)
734            .unwrap_or_else(|| VmValue::Float(n.as_f64().unwrap_or(0.0))),
735        serde_json::Value::String(s) => str_vm(s),
736        serde_json::Value::Array(items) => {
737            VmValue::List(Arc::new(items.iter().map(json_to_vm).collect()))
738        }
739        serde_json::Value::Object(map) => VmValue::dict(
740            map.iter()
741                .map(|(k, v)| (k.clone(), json_to_vm(v)))
742                .collect::<harn_vm::value::DictMap>(),
743        ),
744    }
745}
746
747// ---------------------------------------------------------------------------
748// Minimal arg/value helpers (harn-hostlib's `tools::args` is crate-private)
749// ---------------------------------------------------------------------------
750
751fn first_dict(
752    builtin: &'static str,
753    args: &[VmValue],
754) -> Result<Arc<harn_vm::value::DictMap>, HostlibError> {
755    match args.first() {
756        Some(VmValue::Dict(dict)) => Ok(dict.clone()),
757        Some(VmValue::Nil) | None => Ok(Arc::new(harn_vm::value::DictMap::new())),
758        Some(_) => Err(HostlibError::InvalidParameter {
759            builtin,
760            param: "params",
761            message: "expected a dict argument".into(),
762        }),
763    }
764}
765
766fn require_string(
767    builtin: &'static str,
768    dict: &harn_vm::value::DictMap,
769    key: &'static str,
770) -> Result<String, HostlibError> {
771    match dict.get(key) {
772        Some(VmValue::String(s)) => Ok(s.to_string()),
773        _ => Err(HostlibError::MissingParameter {
774            builtin,
775            param: key,
776        }),
777    }
778}
779
780fn optional_string(dict: &harn_vm::value::DictMap, key: &str) -> Option<String> {
781    match dict.get(key) {
782        Some(VmValue::String(s)) => Some(s.to_string()),
783        _ => None,
784    }
785}
786
787fn optional_string_list(dict: &harn_vm::value::DictMap, key: &str) -> Vec<String> {
788    match dict.get(key) {
789        Some(VmValue::List(items)) => items
790            .iter()
791            .filter_map(|v| match v {
792                VmValue::String(s) => Some(s.to_string()),
793                _ => None,
794            })
795            .collect(),
796        _ => Vec::new(),
797    }
798}
799
800fn optional_bool(dict: &harn_vm::value::DictMap, key: &str, default: bool) -> bool {
801    match dict.get(key) {
802        Some(VmValue::Bool(b)) => *b,
803        _ => default,
804    }
805}
806
807fn str_vm(s: impl AsRef<str>) -> VmValue {
808    VmValue::String(Arc::from(s.as_ref()))
809}
810
811fn dict_vm<const N: usize>(entries: [(&str, VmValue); N]) -> VmValue {
812    let map: harn_vm::value::DictMap = entries
813        .into_iter()
814        .map(|(k, v)| (k.to_string(), v))
815        .collect();
816    VmValue::dict(map)
817}
818
819#[cfg(test)]
820mod tests {
821    use super::*;
822
823    fn dict(pairs: &[(&str, VmValue)]) -> VmValue {
824        let map: harn_vm::value::DictMap = pairs
825            .iter()
826            .map(|(k, v)| (k.to_string(), v.clone()))
827            .collect();
828        VmValue::dict(map)
829    }
830
831    fn get<'a>(v: &'a VmValue, key: &str) -> &'a VmValue {
832        match v {
833            VmValue::Dict(d) => d.get(key).unwrap_or_else(|| panic!("missing {key}")),
834            _ => panic!("not a dict"),
835        }
836    }
837
838    fn int(v: &VmValue) -> i64 {
839        match v {
840            VmValue::Int(i) => *i,
841            other => panic!("not int: {other:?}"),
842        }
843    }
844
845    fn s(v: &VmValue) -> String {
846        match v {
847            VmValue::String(s) => s.to_string(),
848            other => panic!("not string: {other:?}"),
849        }
850    }
851
852    fn b(v: &VmValue) -> bool {
853        match v {
854            VmValue::Bool(b) => *b,
855            other => panic!("not bool: {other:?}"),
856        }
857    }
858
859    const SEARCH_RULE: &str = r#"
860        id = "find-calls"
861        language = "typescript"
862        [rule]
863        pattern = "$FN()"
864    "#;
865
866    #[test]
867    fn search_returns_matches_with_captures() {
868        let result = search_run(&[dict(&[
869            ("rule", str_vm(SEARCH_RULE)),
870            ("source", str_vm("foo();\nbar();\n")),
871            ("language", str_vm("typescript")),
872        ])])
873        .unwrap();
874        assert_eq!(int(get(&result, "match_count")), 2);
875        let matches = match get(&result, "matches") {
876            VmValue::List(l) => l.clone(),
877            _ => panic!(),
878        };
879        assert_eq!(s(get(get(&matches[0], "captures"), "FN")), "foo");
880    }
881
882    #[test]
883    fn search_skips_non_utf8_paths() {
884        let dir = tempfile::tempdir().unwrap();
885        let source_path = dir.path().join("calls.ts");
886        let binary_path = dir.path().join(".DS_Store");
887        std::fs::write(&source_path, b"foo();\n").unwrap();
888        std::fs::write(&binary_path, [0xff, 0xfe, 0xfd]).unwrap();
889
890        let result = search_run(&[dict(&[
891            ("rule", str_vm(SEARCH_RULE)),
892            (
893                "paths",
894                VmValue::List(Arc::new(vec![
895                    str_vm(source_path.display().to_string()),
896                    str_vm(binary_path.display().to_string()),
897                ])),
898            ),
899        ])])
900        .unwrap();
901
902        assert_eq!(int(get(&result, "match_count")), 1);
903        let matches = match get(&result, "matches") {
904            VmValue::List(l) => l.clone(),
905            _ => panic!(),
906        };
907        assert_eq!(
908            s(get(&matches[0], "path")),
909            source_path.display().to_string()
910        );
911    }
912
913    #[test]
914    fn search_returns_harn_capture_metadata() {
915        let rule = r#"
916            id = "int-logs"
917            language = "harn"
918            [rule]
919            pattern = "log($VALUE)"
920        "#;
921        let result = search_run(&[dict(&[
922            ("rule", str_vm(rule)),
923            (
924                "source",
925                str_vm("fn main() {\n  let count: int = 1\n  log(count)\n}\n"),
926            ),
927            ("language", str_vm("harn")),
928        ])])
929        .unwrap();
930        let matches = match get(&result, "matches") {
931            VmValue::List(l) => l.clone(),
932            _ => panic!(),
933        };
934        let metadata = get(get(&matches[0], "capture_metadata"), "VALUE");
935        assert_eq!(s(get(metadata, "type")), "int");
936        assert_eq!(s(get(get(metadata, "resolved"), "name")), "count");
937        assert_eq!(s(get(get(metadata, "resolved"), "kind")), "let");
938    }
939
940    #[test]
941    fn report_returns_a_data_table() {
942        let result = report_run(&[dict(&[
943            ("rule", str_vm(SEARCH_RULE)),
944            ("source", str_vm("foo();\nbar();\n")),
945            ("language", str_vm("typescript")),
946            ("path", str_vm("a.ts")),
947        ])])
948        .unwrap();
949        assert_eq!(int(get(get(&result, "summary"), "total_rows")), 2);
950        assert_eq!(s(get(&result, "rule_id")), "find-calls");
951    }
952
953    #[test]
954    fn apply_dry_run_previews_without_writing() {
955        let rule = r#"
956            id = "rename"
957            language = "typescript"
958            safety = "behavior-preserving"
959            fix = "bar()"
960            [rule]
961            pattern = "foo()"
962        "#;
963        let result = apply_run(&[dict(&[
964            ("rule", str_vm(rule)),
965            ("source", str_vm("foo();\n")),
966            ("language", str_vm("typescript")),
967            ("dry_run", VmValue::Bool(true)),
968        ])])
969        .unwrap();
970        let files = match get(&result, "files") {
971            VmValue::List(l) => l.clone(),
972            _ => panic!(),
973        };
974        assert!(b(get(&files[0], "changed")));
975        assert!(!b(get(&files[0], "applied")));
976        assert_eq!(s(get(&files[0], "preview")), "bar();\n");
977    }
978
979    const UGLY_HARN_CODEMOD: &str = r#"
980        id = "dd"
981        language = "harn"
982        safety = "scope-local"
983        fix = "let {$K=$D}=$X"
984        [rule]
985        pattern = "let $K = $X?.$K ?? $D"
986    "#;
987
988    #[test]
989    fn apply_formats_harn_output_by_default() {
990        // The fix template is deliberately ugly; the #2847 fmt post-pass
991        // normalizes the rewritten `.harn` (so a batch lands fmt-stable).
992        let result = apply_run(&[dict(&[
993            ("rule", str_vm(UGLY_HARN_CODEMOD)),
994            (
995                "source",
996                str_vm("fn main() {\n  let timeout = cfg?.timeout ?? 30\n}\n"),
997            ),
998            ("language", str_vm("harn")),
999            ("dry_run", VmValue::Bool(true)),
1000        ])])
1001        .unwrap();
1002        let files = match get(&result, "files") {
1003            VmValue::List(l) => l.clone(),
1004            _ => panic!(),
1005        };
1006        assert!(b(get(&files[0], "changed")));
1007        assert!(b(get(&files[0], "formatted")));
1008        let preview = s(get(&files[0], "preview"));
1009        assert!(preview.contains("= 30"), "preview not formatted: {preview}");
1010    }
1011
1012    #[test]
1013    fn apply_format_false_leaves_raw_output() {
1014        let result = apply_run(&[dict(&[
1015            ("rule", str_vm(UGLY_HARN_CODEMOD)),
1016            (
1017                "source",
1018                str_vm("fn main() {\n  let timeout = cfg?.timeout ?? 30\n}\n"),
1019            ),
1020            ("language", str_vm("harn")),
1021            ("dry_run", VmValue::Bool(true)),
1022            ("format", VmValue::Bool(false)),
1023        ])])
1024        .unwrap();
1025        let files = match get(&result, "files") {
1026            VmValue::List(l) => l.clone(),
1027            _ => panic!(),
1028        };
1029        assert!(!b(get(&files[0], "formatted")));
1030        let preview = s(get(&files[0], "preview"));
1031        assert!(preview.contains("{timeout=30}"), "expected raw: {preview}");
1032    }
1033
1034    #[test]
1035    fn diagnostics_returns_lint_findings() {
1036        let lint = r#"
1037            id = "calls"
1038            language = "typescript"
1039            message = "function call"
1040            [rule]
1041            pattern = "$FN()"
1042        "#;
1043        let result = diagnostics_run(&[dict(&[
1044            ("rule", str_vm(lint)),
1045            ("source", str_vm("foo();\nbar();\n")),
1046            ("language", str_vm("typescript")),
1047            ("path", str_vm("a.ts")),
1048        ])])
1049        .unwrap();
1050        assert_eq!(int(get(&result, "diagnostic_count")), 2);
1051        let diags = match get(&result, "diagnostics") {
1052            VmValue::List(l) => l.clone(),
1053            _ => panic!(),
1054        };
1055        assert_eq!(s(get(&diags[0], "message")), "function call");
1056        assert_eq!(s(get(&diags[0], "severity")), "warning");
1057        // No `fix` and default safety → a suggestion, not machine-applicable.
1058        assert_eq!(s(get(&diags[0], "applicability")), "suggestion");
1059        assert_eq!(int(get(&diags[1], "start_row")), 1);
1060        assert!(matches!(get(&diags[0], "fix"), VmValue::Nil));
1061    }
1062
1063    #[test]
1064    fn report_helpers_round_trip_severity_and_safety() {
1065        // The string<->enum mapping used by `rules.visit` reports.
1066        assert_eq!(parse_severity("error"), Some(Severity::Error));
1067        assert_eq!(parse_severity("bogus"), None);
1068        assert_eq!(parse_safety("format-only"), Some(Safety::FormatOnly));
1069        assert_eq!(parse_safety("needs-human"), Some(Safety::NeedsHuman));
1070        assert_eq!(parse_safety("nope"), None);
1071        // `true` flags with defaults; nil/false skip; a dict carries fields.
1072        assert_eq!(reports_from_return(VmValue::Bool(true)).len(), 1);
1073        assert_eq!(reports_from_return(VmValue::Nil).len(), 0);
1074        assert_eq!(reports_from_return(VmValue::Bool(false)).len(), 0);
1075        let list = VmValue::List(Arc::new(vec![
1076            dict(&[("message", str_vm("a"))]),
1077            VmValue::Nil,
1078            dict(&[("message", str_vm("b"))]),
1079        ]));
1080        assert_eq!(reports_from_return(list).len(), 2);
1081    }
1082
1083    #[test]
1084    fn capability_does_not_register_the_async_visitor() {
1085        // `rules.visit` is async, so it is installed directly on the VM in
1086        // `install`, not through the sync capability registry.
1087        let mut registry = BuiltinRegistry::new();
1088        RulesCapability.register_builtins(&mut registry);
1089        let names: Vec<_> = registry.iter().map(|b| b.name).collect();
1090        assert!(!names.contains(&VISIT));
1091        assert!(names.contains(&DIAGNOSTICS));
1092    }
1093
1094    #[test]
1095    fn missing_rule_is_an_error() {
1096        let err = search_run(&[dict(&[
1097            ("source", str_vm("x")),
1098            ("language", str_vm("rust")),
1099        ])]);
1100        assert!(matches!(
1101            err,
1102            Err(HostlibError::MissingParameter { param: "rule", .. })
1103        ));
1104    }
1105
1106    #[test]
1107    fn capability_registers_the_sync_builtins() {
1108        let mut registry = BuiltinRegistry::new();
1109        RulesCapability.register_builtins(&mut registry);
1110        let names: Vec<_> = registry.iter().map(|b| b.name).collect();
1111        assert_eq!(names, vec![SEARCH, REPORT, DIAGNOSTICS, APPLY, FOLD]);
1112    }
1113
1114    #[test]
1115    fn lint_capability_registers_run() {
1116        let mut registry = BuiltinRegistry::new();
1117        LintCapability.register_builtins(&mut registry);
1118        let names: Vec<_> = registry.iter().map(|b| b.name).collect();
1119        assert_eq!(names, vec![LINT_RUN]);
1120    }
1121
1122    #[test]
1123    fn lint_run_returns_the_linter_findings() {
1124        let result =
1125            lint_run(&[dict(&[("source", str_vm("fn f() {\n  let x = (1)\n}\n"))])]).unwrap();
1126        assert_eq!(s(get(&result, "result")), "ok");
1127        let diags = match get(&result, "diagnostics") {
1128            VmValue::List(l) => l.clone(),
1129            _ => panic!(),
1130        };
1131        assert!(
1132            diags
1133                .iter()
1134                .any(|d| s(get(d, "rule")) == "unnecessary-parentheses"),
1135            "expected unnecessary-parentheses, got {diags:?}"
1136        );
1137    }
1138
1139    #[test]
1140    fn lint_run_applies_a_severity_override() {
1141        let result = lint_run(&[dict(&[
1142            ("source", str_vm("fn f() {\n  let x = (1)\n}\n")),
1143            (
1144                "severity",
1145                dict(&[("unnecessary-parentheses", str_vm("error"))]),
1146            ),
1147        ])])
1148        .unwrap();
1149        let diags = match get(&result, "diagnostics") {
1150            VmValue::List(l) => l.clone(),
1151            _ => panic!(),
1152        };
1153        let d = diags
1154            .iter()
1155            .find(|d| s(get(d, "rule")) == "unnecessary-parentheses")
1156            .expect("rule present");
1157        assert_eq!(s(get(d, "severity")), "error");
1158    }
1159}