Skip to main content

harn_rules_hostlib/
lib.rs

1//! Host capability exposing the `harn-rules` declarative rule engine to
2//! Harn as `rules.search` / `rules.report` / `rules.apply`.
3//!
4//! This crate lives outside `harn-hostlib` on purpose: `harn-rules` already
5//! depends on `harn-hostlib` (for the tree-sitter grammars), so the rules
6//! builtins would form a dependency cycle if they lived there. An embedder
7//! (harn-cli, harn-serve) calls [`install`] alongside `harn_hostlib::install_default`.
8//!
9//! ## Builtins
10//!
11//! - `rules.search` (read-only) — run a rule and return its matches.
12//! - `rules.report` (read-only) — run a rule in report-only mode and return
13//!   a [`harn_rules::DataTable`] (counts + per-match rows).
14//! - `rules.diagnostics` (read-only) — run a **declarative** rule and return
15//!   its [`harn_rules::Diagnostic`]s (message + severity + span + fix).
16//! - `rules.visit` (read-only, **async**) — the **imperative** escape hatch:
17//!   run a rule's matcher, then invoke a `.harn` visitor
18//!   `on_match($node, $ctx)` once per match. The visitor returns its
19//!   report(s) — `nil`/`false` to skip, a `{message, fix, safety}` dict, or
20//!   a list of them — which the engine turns into diagnostics of the same
21//!   shape `rules.diagnostics` emits. The visitor has full programmatic
22//!   control (compute a message/fix from the captured metavars), which the
23//!   declarative form cannot.
24//! - `rules.apply` (write-gated) — apply a codemod rule's `fix`; writes only
25//!   when `dry_run: false` *and* the rule is safe to auto-apply (or
26//!   `allow_unsafe: true`). Shares the deterministic-tools gate with the
27//!   other mutating builtins.
28//!
29//! A rule is passed as its TOML source (`rule`), so an agent can author and
30//! run a rule — declarative *or* imperative — entirely from `.harn` without
31//! recompiling the binary.
32//!
33//! ### Why `rules.visit` is async, and why it returns rather than mutates
34//!
35//! A *synchronous* hostlib builtin cannot call a `.harn` closure: the VM's
36//! [`Vm::call_closure_pub`] is async-only. So the visitor is registered as an
37//! **async** builtin (directly on the VM via [`Vm::register_async_builtin`],
38//! bypassing the sync [`HostlibRegistry`]), which can obtain a child VM from
39//! its [`AsyncBuiltinCtx`] and call back per match.
40//!
41//! The visitor **returns** its reports instead of calling a mutating
42//! `ctx.report(...)`: Harn closures capture by value, so a Harn-side
43//! accumulator could not collect across calls, and `VmValue` has no callable
44//! variant that carries captured Rust state to embed a stateful `report`
45//! method in `ctx`. Returning is both the sound option and the simpler one.
46
47use std::collections::BTreeMap;
48use std::path::{Path, PathBuf};
49use std::sync::Arc;
50
51use harn_hostlib::ast::Language;
52use harn_hostlib::tools::permissions::gated_handler;
53use harn_hostlib::{
54    BuiltinRegistry, HostlibCapability, HostlibError, HostlibRegistry, RegisteredBuiltin,
55};
56use harn_vm::{AsyncBuiltinCtx, Vm, VmError, VmValue};
57
58use harn_rules::{
59    data_table, Applicability, BindingMetadata, CompiledRule, Diagnostic, ResolvedBinding, Rule,
60    RuleMatch, Safety, Severity, SourceFile, Span,
61};
62
63const SEARCH: &str = "hostlib_rules_search";
64const REPORT: &str = "hostlib_rules_report";
65const DIAGNOSTICS: &str = "hostlib_rules_diagnostics";
66const VISIT: &str = "hostlib_rules_visit";
67const APPLY: &str = "hostlib_rules_apply";
68const FOLD: &str = "hostlib_rules_fold";
69const LINT_RUN: &str = "hostlib_lint_run";
70
71/// The `rules` host capability.
72#[derive(Default)]
73pub struct RulesCapability;
74
75impl HostlibCapability for RulesCapability {
76    fn module_name(&self) -> &'static str {
77        "rules"
78    }
79
80    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
81        registry.register(RegisteredBuiltin {
82            name: SEARCH,
83            module: "rules",
84            method: "search",
85            handler: Arc::new(search_run),
86        });
87        registry.register(RegisteredBuiltin {
88            name: REPORT,
89            module: "rules",
90            method: "report",
91            handler: Arc::new(report_run),
92        });
93        registry.register(RegisteredBuiltin {
94            name: DIAGNOSTICS,
95            module: "rules",
96            method: "diagnostics",
97            handler: Arc::new(diagnostics_run),
98        });
99        // `apply` writes files, so it shares the deterministic-tools gate.
100        registry.register(RegisteredBuiltin {
101            name: APPLY,
102            module: "rules",
103            method: "apply",
104            handler: gated_handler(APPLY, apply_run),
105        });
106        // `fold` also writes; same gate.
107        registry.register(RegisteredBuiltin {
108            name: FOLD,
109            module: "rules",
110            method: "fold",
111            handler: gated_handler(FOLD, fold_run),
112        });
113    }
114}
115
116/// The `lint` host capability (#2851): runs the Harn linter for an
117/// agent/IDE/cloud caller, returning the same diagnostics the CLI emits.
118#[derive(Default)]
119pub struct LintCapability;
120
121impl HostlibCapability for LintCapability {
122    fn module_name(&self) -> &'static str {
123        "lint"
124    }
125
126    fn register_builtins(&self, registry: &mut BuiltinRegistry) {
127        // Read-only: lint.run parses + lints in memory, never writes.
128        registry.register(RegisteredBuiltin {
129            name: LINT_RUN,
130            module: "lint",
131            method: "run",
132            handler: Arc::new(lint_run),
133        });
134    }
135}
136
137/// Install the `rules` + `lint` capabilities into a VM. Call this alongside
138/// `harn_hostlib::install_default`.
139pub fn install(vm: &mut Vm) {
140    HostlibRegistry::new()
141        .with(RulesCapability)
142        .with(LintCapability)
143        .register_into_vm(vm);
144    // `rules.visit` invokes a `.harn` closure per match, which only an async
145    // builtin can do (`call_closure_pub` is async). It is therefore registered
146    // directly on the VM rather than through the sync `HostlibRegistry`.
147    vm.register_async_builtin(VISIT, visit_run);
148}
149
150// ---------------------------------------------------------------------------
151// Builtins
152// ---------------------------------------------------------------------------
153
154fn search_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
155    let dict = first_dict(SEARCH, args)?;
156    let rule = compile_rule(SEARCH, &dict)?;
157    let files = load_files(SEARCH, &dict)?;
158
159    let mut matches = Vec::new();
160    for file in &files {
161        for m in rule.run(&file.source).map_err(|e| backend(SEARCH, &e))? {
162            matches.push(match_to_vm(&file.path, &m));
163        }
164    }
165    Ok(dict_vm([
166        ("result", str_vm("ok")),
167        ("match_count", VmValue::Int(matches.len() as i64)),
168        ("matches", VmValue::List(Arc::new(matches))),
169    ]))
170}
171
172fn report_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
173    let dict = first_dict(REPORT, args)?;
174    let rule = compile_rule(REPORT, &dict)?;
175    let files = load_files(REPORT, &dict)?;
176    let table = data_table(&rule, &files).map_err(|e| backend(REPORT, &e))?;
177    Ok(json_to_vm(&table.to_json_value()))
178}
179
180fn diagnostics_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
181    let dict = first_dict(DIAGNOSTICS, args)?;
182    let rule = compile_rule(DIAGNOSTICS, &dict)?;
183    let files = load_files(DIAGNOSTICS, &dict)?;
184
185    let mut diagnostics = Vec::new();
186    for file in &files {
187        for d in rule
188            .diagnostics(&file.source)
189            .map_err(|e| backend(DIAGNOSTICS, &e))?
190        {
191            diagnostics.push(diagnostic_vm(&file.path, &d));
192        }
193    }
194    Ok(dict_vm([
195        ("result", str_vm("ok")),
196        ("diagnostic_count", VmValue::Int(diagnostics.len() as i64)),
197        ("diagnostics", VmValue::List(Arc::new(diagnostics))),
198    ]))
199}
200
201/// The imperative escape hatch (#2878): run the rule's matcher, then call the
202/// `.harn` visitor `on_match($node, $ctx)` once per match. The visitor's
203/// return value becomes diagnostics of the same shape `rules.diagnostics`
204/// emits. Read-only — it never writes; the agent applies fixes itself.
205async fn visit_run(ctx: AsyncBuiltinCtx, args: Vec<VmValue>) -> Result<VmValue, VmError> {
206    let dict = first_dict(VISIT, &args).map_err(host_err)?;
207    let rule = compile_rule(VISIT, &dict).map_err(host_err)?;
208    let files = load_files(VISIT, &dict).map_err(host_err)?;
209    let visitor = match dict.get("on_match") {
210        Some(VmValue::Closure(c)) => c.clone(),
211        _ => {
212            return Err(VmError::Runtime(format!(
213                "{VISIT}: `on_match` must be a function `fn(node, ctx)`"
214            )))
215        }
216    };
217
218    let default_severity = rule.severity();
219    let default_safety = rule.safety();
220    let rule_id = rule.id().to_string();
221
222    let mut vm = ctx.child_vm();
223    let mut diagnostics = Vec::new();
224    for file in &files {
225        let matches = rule
226            .run(&file.source)
227            .map_err(|e| host_err(backend(VISIT, &e)))?;
228        let file_ctx = ctx_vm(&file.path, file.language, &file.source, &rule_id);
229        for m in &matches {
230            let node = node_vm(m);
231            let ret = vm
232                .call_closure_pub(&visitor, &[node, file_ctx.clone()])
233                .await?;
234            ctx.forward_output(&vm.take_output());
235            for report in reports_from_return(ret) {
236                diagnostics.push(report_to_diagnostic_vm(
237                    &file.path,
238                    &rule_id,
239                    m.span,
240                    report,
241                    default_severity,
242                    default_safety,
243                ));
244            }
245        }
246    }
247    Ok(dict_vm([
248        ("result", str_vm("ok")),
249        ("diagnostic_count", VmValue::Int(diagnostics.len() as i64)),
250        ("diagnostics", VmValue::List(Arc::new(diagnostics))),
251    ]))
252}
253
254fn apply_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
255    let dict = first_dict(APPLY, args)?;
256    let rule = compile_rule(APPLY, &dict)?;
257    let dry_run = optional_bool(&dict, "dry_run", true);
258    let allow_unsafe = optional_bool(&dict, "allow_unsafe", false);
259    // fmt post-pass (#2847): normalize rewritten `.harn` so a batch lands
260    // fmt-stable. On by default; `format: false` opts out.
261    let format = optional_bool(&dict, "format", true);
262    let files = load_files(APPLY, &dict)?;
263
264    let auto_applicable = rule.safety().is_auto_applicable();
265    let mut entries = Vec::new();
266    for file in &files {
267        let outcome = rule.apply(&file.source).map_err(|e| backend(APPLY, &e))?;
268        // Only `.harn` has a formatter; harn_fmt is idempotent, so a later
269        // `harn fmt` is a no-op. A formatter error falls back to the raw
270        // rewrite rather than failing the codemod.
271        let formatted = format && outcome.changed && file.language == Language::Harn;
272        let rewritten = if formatted {
273            match harn_fmt::format_source(&outcome.rewritten) {
274                Ok(canonical) => canonical,
275                Err(_) => outcome.rewritten,
276            }
277        } else {
278            outcome.rewritten
279        };
280        // Write only on a real apply, when the edit is safe to auto-apply
281        // (or explicitly allowed), and the rule actually changed the file.
282        let applied = !dry_run && outcome.changed && (auto_applicable || allow_unsafe);
283        if applied {
284            std::fs::write(&file.path, &rewritten).map_err(|e| HostlibError::Backend {
285                builtin: APPLY,
286                message: format!("write `{}`: {e}", file.path.display()),
287            })?;
288        }
289        entries.push(dict_vm([
290            ("path", str_vm(file.path.display().to_string())),
291            ("changed", VmValue::Bool(outcome.changed)),
292            ("applied", VmValue::Bool(applied)),
293            ("idempotent", VmValue::Bool(outcome.idempotent)),
294            ("formatted", VmValue::Bool(formatted)),
295            ("safety", str_vm(format!("{:?}", outcome.safety))),
296            // The original source, so callers can render a diff without a
297            // (sandboxed) re-read of the file.
298            ("before", str_vm(&file.source)),
299            ("preview", str_vm(rewritten)),
300        ]));
301    }
302    Ok(dict_vm([
303        ("result", str_vm("ok")),
304        ("dry_run", VmValue::Bool(dry_run)),
305        ("auto_applicable", VmValue::Bool(auto_applicable)),
306        ("files", VmValue::List(Arc::new(entries))),
307    ]))
308}
309
310/// `rules.fold` (#2824): fold consecutive `let x = src?.x ?? d` runs into a
311/// single destructure-with-defaults. A specialized, behavior-preserving
312/// codemod (the engine can't fold statement sequences declaratively). Writes
313/// only on a real apply (`dry_run: false`); shares the deterministic gate.
314fn fold_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
315    let dict = first_dict(FOLD, args)?;
316    let dry_run = optional_bool(&dict, "dry_run", true);
317    let files = load_files(FOLD, &dict)?;
318
319    let mut entries = Vec::new();
320    for file in &files {
321        let folded =
322            harn_rules::fold::fold_destructure_defaults(&file.source, file.language.name())
323                .map_err(|e| backend(FOLD, &e))?;
324        let changed = folded != file.source;
325        let applied = !dry_run && changed;
326        if applied {
327            std::fs::write(&file.path, &folded).map_err(|e| HostlibError::Backend {
328                builtin: FOLD,
329                message: format!("write `{}`: {e}", file.path.display()),
330            })?;
331        }
332        entries.push(dict_vm([
333            ("path", str_vm(file.path.display().to_string())),
334            ("changed", VmValue::Bool(changed)),
335            ("applied", VmValue::Bool(applied)),
336            ("before", str_vm(&file.source)),
337            ("preview", str_vm(folded)),
338        ]));
339    }
340    Ok(dict_vm([
341        ("result", str_vm("ok")),
342        ("dry_run", VmValue::Bool(dry_run)),
343        ("files", VmValue::List(Arc::new(entries))),
344    ]))
345}
346
347/// `lint.run` (#2851): lint a Harn source string and return its diagnostics, so
348/// an agent / IDE / cloud caller gets the same findings as `harn lint` without
349/// shelling out. Read-only. Params: `{source, disabled?, severity?}` where
350/// `severity` maps a rule id to `"error"` / `"warning"` / `"info"`.
351fn lint_run(args: &[VmValue]) -> Result<VmValue, HostlibError> {
352    let dict = first_dict(LINT_RUN, args)?;
353    let source = require_string(LINT_RUN, &dict, "source")?;
354    let disabled = optional_string_list(&dict, "disabled");
355    let severity_overrides = parse_severity_overrides(&dict);
356
357    let program = harn_parser::parse_source(&source).map_err(|e| HostlibError::Backend {
358        builtin: LINT_RUN,
359        message: format!("parse error: {e}"),
360    })?;
361    let options = harn_lint::LintOptions {
362        severity_overrides,
363        ..Default::default()
364    };
365    let diagnostics = harn_lint::lint_with_options(
366        &program,
367        &disabled,
368        Some(&source),
369        &std::collections::HashSet::new(),
370        &options,
371    );
372    let items: Vec<VmValue> = diagnostics.iter().map(lint_diagnostic_vm).collect();
373    Ok(dict_vm([
374        ("result", str_vm("ok")),
375        ("diagnostic_count", VmValue::Int(items.len() as i64)),
376        ("diagnostics", VmValue::List(Arc::new(items))),
377    ]))
378}
379
380/// Parse a `severity` dict param (`{rule: "error"|"warning"|"info"}`) into the
381/// linter's override map. Unknown severities are skipped.
382fn parse_severity_overrides(
383    dict: &BTreeMap<String, VmValue>,
384) -> std::collections::HashMap<String, harn_lint::LintSeverity> {
385    let mut out = std::collections::HashMap::new();
386    if let Some(VmValue::Dict(map)) = dict.get("severity") {
387        for (rule, value) in map.iter() {
388            if let VmValue::String(s) = value {
389                let severity = match s.to_ascii_lowercase().as_str() {
390                    "error" => Some(harn_lint::LintSeverity::Error),
391                    "warning" | "warn" => Some(harn_lint::LintSeverity::Warning),
392                    "info" => Some(harn_lint::LintSeverity::Info),
393                    _ => None,
394                };
395                if let Some(severity) = severity {
396                    out.insert(rule.clone(), severity);
397                }
398            }
399        }
400    }
401    out
402}
403
404/// Marshal a [`harn_lint::LintDiagnostic`] into a VM dict, mirroring the
405/// fields the CLI renders (code, rule, message, severity, span).
406fn lint_diagnostic_vm(diag: &harn_lint::LintDiagnostic) -> VmValue {
407    let severity = match diag.severity {
408        harn_lint::LintSeverity::Error => "error",
409        harn_lint::LintSeverity::Warning => "warning",
410        harn_lint::LintSeverity::Info => "info",
411    };
412    dict_vm([
413        ("code", str_vm(diag.code.as_str())),
414        ("rule", str_vm(diag.rule.as_ref())),
415        ("message", str_vm(&diag.message)),
416        ("severity", str_vm(severity)),
417        ("start_byte", VmValue::Int(diag.span.start as i64)),
418        ("end_byte", VmValue::Int(diag.span.end as i64)),
419        ("line", VmValue::Int(diag.span.line as i64)),
420        ("column", VmValue::Int(diag.span.column as i64)),
421    ])
422}
423
424// ---------------------------------------------------------------------------
425// Shared parsing / conversion
426// ---------------------------------------------------------------------------
427
428fn compile_rule(
429    builtin: &'static str,
430    dict: &BTreeMap<String, VmValue>,
431) -> Result<CompiledRule, HostlibError> {
432    let toml = require_string(builtin, dict, "rule")?;
433    let rule = Rule::from_toml_str(&toml).map_err(|e| HostlibError::InvalidParameter {
434        builtin,
435        param: "rule",
436        message: format!("invalid rule TOML: {e}"),
437    })?;
438    CompiledRule::compile(&rule).map_err(|e| HostlibError::InvalidParameter {
439        builtin,
440        param: "rule",
441        message: e.to_string(),
442    })
443}
444
445/// Load the fileset: inline `source` (+ `language`) for a single buffer, or
446/// `paths` read from disk (language inferred per file; undetectable files
447/// are skipped).
448fn load_files(
449    builtin: &'static str,
450    dict: &BTreeMap<String, VmValue>,
451) -> Result<Vec<SourceFile>, HostlibError> {
452    if let Some(source) = optional_string(dict, "source") {
453        let language_name = require_string(builtin, dict, "language")?;
454        let language =
455            Language::from_name(&language_name).ok_or_else(|| HostlibError::InvalidParameter {
456                builtin,
457                param: "language",
458                message: format!("unknown language `{language_name}`"),
459            })?;
460        let path = optional_string(dict, "path").unwrap_or_else(|| "<inline>".to_string());
461        return Ok(vec![SourceFile {
462            path: PathBuf::from(path),
463            language,
464            source,
465        }]);
466    }
467
468    let paths = optional_string_list(dict, "paths");
469    if paths.is_empty() {
470        return Err(HostlibError::MissingParameter {
471            builtin,
472            param: "paths",
473        });
474    }
475    let mut files = Vec::new();
476    for path in paths {
477        let contents = std::fs::read_to_string(&path).map_err(|e| HostlibError::Backend {
478            builtin,
479            message: format!("read `{path}`: {e}"),
480        })?;
481        if let Some(file) = SourceFile::detect(&path, contents) {
482            files.push(file);
483        }
484    }
485    Ok(files)
486}
487
488fn match_to_vm(path: &std::path::Path, m: &RuleMatch) -> VmValue {
489    let captures: BTreeMap<String, VmValue> = m
490        .bindings
491        .iter()
492        .map(|(name, b)| (name.clone(), str_vm(&b.text)))
493        .collect();
494    let capture_metadata = capture_metadata_vm(m);
495    dict_vm([
496        ("path", str_vm(path.display().to_string())),
497        ("text", str_vm(&m.text)),
498        ("start_row", VmValue::Int(m.span.start_row as i64)),
499        ("start_col", VmValue::Int(m.span.start_col as i64)),
500        ("end_row", VmValue::Int(m.span.end_row as i64)),
501        ("end_col", VmValue::Int(m.span.end_col as i64)),
502        ("captures", VmValue::Dict(Arc::new(captures))),
503        ("capture_metadata", capture_metadata),
504    ])
505}
506
507fn backend(builtin: &'static str, err: &harn_rules::RulesError) -> HostlibError {
508    HostlibError::Backend {
509        builtin,
510        message: err.to_string(),
511    }
512}
513
514/// Lower a `HostlibError` into a `VmError` for the async `rules.visit` path
515/// (which must return `VmError`, not `HostlibError`).
516fn host_err(err: HostlibError) -> VmError {
517    VmError::Runtime(err.to_string())
518}
519
520/// One report a `.harn` visitor returned for a single match. Every field is
521/// optional: an empty report (e.g. the visitor returned `true`) flags the
522/// match using the rule's own defaults.
523#[derive(Default)]
524struct ReportSpec {
525    message: Option<String>,
526    fix: Option<String>,
527    safety: Option<Safety>,
528    severity: Option<Severity>,
529}
530
531/// The `node` value handed to a visitor: the matched text, its metavar
532/// captures, and its span.
533fn node_vm(m: &RuleMatch) -> VmValue {
534    let captures: BTreeMap<String, VmValue> = m
535        .bindings
536        .iter()
537        .map(|(name, b)| (name.clone(), str_vm(&b.text)))
538        .collect();
539    let capture_metadata = capture_metadata_vm(m);
540    dict_vm([
541        ("text", str_vm(&m.text)),
542        ("captures", VmValue::Dict(Arc::new(captures))),
543        ("capture_metadata", capture_metadata),
544        ("start_row", VmValue::Int(m.span.start_row as i64)),
545        ("start_col", VmValue::Int(m.span.start_col as i64)),
546        ("end_row", VmValue::Int(m.span.end_row as i64)),
547        ("end_col", VmValue::Int(m.span.end_col as i64)),
548    ])
549}
550
551fn capture_metadata_vm(m: &RuleMatch) -> VmValue {
552    let metadata: BTreeMap<String, VmValue> = m
553        .bindings
554        .iter()
555        .filter(|(_, binding)| !binding.metadata.is_empty())
556        .map(|(name, binding)| (name.clone(), binding_metadata_vm(&binding.metadata)))
557        .collect();
558    VmValue::Dict(Arc::new(metadata))
559}
560
561fn binding_metadata_vm(metadata: &BindingMetadata) -> VmValue {
562    let mut entries = BTreeMap::new();
563    if let Some(ty) = &metadata.ty {
564        entries.insert("type".into(), str_vm(ty));
565    }
566    if let Some(resolved) = &metadata.resolved {
567        entries.insert("resolved".into(), resolved_binding_vm(resolved));
568    }
569    VmValue::Dict(Arc::new(entries))
570}
571
572fn resolved_binding_vm(resolved: &ResolvedBinding) -> VmValue {
573    dict_vm([
574        ("id", str_vm(&resolved.id)),
575        ("name", str_vm(&resolved.name)),
576        ("kind", str_vm(&resolved.kind)),
577        ("start_row", VmValue::Int(resolved.span.start_row as i64)),
578        ("start_col", VmValue::Int(resolved.span.start_col as i64)),
579        ("end_row", VmValue::Int(resolved.span.end_row as i64)),
580        ("end_col", VmValue::Int(resolved.span.end_col as i64)),
581    ])
582}
583
584/// The read-only `ctx` value handed to a visitor: where the match lives and
585/// what produced it.
586fn ctx_vm(path: &Path, language: Language, source: &str, rule_id: &str) -> VmValue {
587    dict_vm([
588        ("path", str_vm(path.display().to_string())),
589        ("language", str_vm(language.name())),
590        ("source", str_vm(source)),
591        ("rule_id", str_vm(rule_id)),
592    ])
593}
594
595/// Build a diagnostic dict — the one shape both `rules.diagnostics` and
596/// `rules.visit` emit, so an equivalent declarative and imperative rule
597/// produce identical output.
598fn diagnostic_dict(
599    path: &Path,
600    rule_id: &str,
601    message: &str,
602    severity: Severity,
603    span: Span,
604    fix: Option<String>,
605    applicability: Applicability,
606) -> VmValue {
607    dict_vm([
608        ("path", str_vm(path.display().to_string())),
609        ("rule_id", str_vm(rule_id)),
610        ("message", str_vm(message)),
611        ("severity", str_vm(severity.as_str())),
612        ("start_row", VmValue::Int(span.start_row as i64)),
613        ("start_col", VmValue::Int(span.start_col as i64)),
614        ("end_row", VmValue::Int(span.end_row as i64)),
615        ("end_col", VmValue::Int(span.end_col as i64)),
616        ("applicability", str_vm(applicability.as_str())),
617        ("fix", fix.map(str_vm).unwrap_or(VmValue::Nil)),
618    ])
619}
620
621fn diagnostic_vm(path: &Path, d: &Diagnostic) -> VmValue {
622    diagnostic_dict(
623        path,
624        &d.rule_id,
625        &d.message,
626        d.severity,
627        d.span,
628        d.fix.clone(),
629        d.applicability,
630    )
631}
632
633/// Turn a visitor's [`ReportSpec`] into the same diagnostic dict, located at
634/// the match's span and falling back to the rule's defaults.
635fn report_to_diagnostic_vm(
636    path: &Path,
637    rule_id: &str,
638    span: Span,
639    report: ReportSpec,
640    default_severity: Severity,
641    default_safety: Safety,
642) -> VmValue {
643    let severity = report.severity.unwrap_or(default_severity);
644    let safety = report.safety.unwrap_or(default_safety);
645    diagnostic_dict(
646        path,
647        rule_id,
648        report.message.as_deref().unwrap_or(""),
649        severity,
650        span,
651        report.fix,
652        safety.applicability(),
653    )
654}
655
656/// Interpret a visitor's return value: `nil`/`false` skips, `true` flags with
657/// rule defaults, a dict is one report, a list is many (skipping `nil`/`false`
658/// entries).
659fn reports_from_return(ret: VmValue) -> Vec<ReportSpec> {
660    match ret {
661        VmValue::Nil | VmValue::Bool(false) => Vec::new(),
662        VmValue::Bool(true) => vec![ReportSpec::default()],
663        VmValue::Dict(d) => vec![report_from_dict(&d)],
664        VmValue::List(items) => items.iter().filter_map(report_from_item).collect(),
665        _ => Vec::new(),
666    }
667}
668
669fn report_from_item(v: &VmValue) -> Option<ReportSpec> {
670    match v {
671        VmValue::Nil | VmValue::Bool(false) => None,
672        VmValue::Bool(true) => Some(ReportSpec::default()),
673        VmValue::Dict(d) => Some(report_from_dict(d)),
674        _ => None,
675    }
676}
677
678fn report_from_dict(d: &BTreeMap<String, VmValue>) -> ReportSpec {
679    ReportSpec {
680        message: optional_string(d, "message"),
681        fix: optional_string(d, "fix"),
682        safety: optional_string(d, "safety").and_then(|s| parse_safety(&s)),
683        severity: optional_string(d, "severity").and_then(|s| parse_severity(&s)),
684    }
685}
686
687fn parse_severity(s: &str) -> Option<Severity> {
688    match s {
689        "info" => Some(Severity::Info),
690        "warning" => Some(Severity::Warning),
691        "error" => Some(Severity::Error),
692        _ => None,
693    }
694}
695
696fn parse_safety(s: &str) -> Option<Safety> {
697    match s {
698        "format-only" => Some(Safety::FormatOnly),
699        "behavior-preserving" => Some(Safety::BehaviorPreserving),
700        "scope-local" => Some(Safety::ScopeLocal),
701        "surface-changing" => Some(Safety::SurfaceChanging),
702        "capability-changing" => Some(Safety::CapabilityChanging),
703        "needs-human" => Some(Safety::NeedsHuman),
704        _ => None,
705    }
706}
707
708fn json_to_vm(value: &serde_json::Value) -> VmValue {
709    match value {
710        serde_json::Value::Null => VmValue::Nil,
711        serde_json::Value::Bool(b) => VmValue::Bool(*b),
712        serde_json::Value::Number(n) => n
713            .as_i64()
714            .map(VmValue::Int)
715            .unwrap_or_else(|| VmValue::Float(n.as_f64().unwrap_or(0.0))),
716        serde_json::Value::String(s) => str_vm(s),
717        serde_json::Value::Array(items) => {
718            VmValue::List(Arc::new(items.iter().map(json_to_vm).collect()))
719        }
720        serde_json::Value::Object(map) => VmValue::Dict(Arc::new(
721            map.iter()
722                .map(|(k, v)| (k.clone(), json_to_vm(v)))
723                .collect(),
724        )),
725    }
726}
727
728// ---------------------------------------------------------------------------
729// Minimal arg/value helpers (harn-hostlib's `tools::args` is crate-private)
730// ---------------------------------------------------------------------------
731
732fn first_dict(
733    builtin: &'static str,
734    args: &[VmValue],
735) -> Result<Arc<BTreeMap<String, VmValue>>, HostlibError> {
736    match args.first() {
737        Some(VmValue::Dict(dict)) => Ok(dict.clone()),
738        Some(VmValue::Nil) | None => Ok(Arc::new(BTreeMap::new())),
739        Some(_) => Err(HostlibError::InvalidParameter {
740            builtin,
741            param: "params",
742            message: "expected a dict argument".into(),
743        }),
744    }
745}
746
747fn require_string(
748    builtin: &'static str,
749    dict: &BTreeMap<String, VmValue>,
750    key: &'static str,
751) -> Result<String, HostlibError> {
752    match dict.get(key) {
753        Some(VmValue::String(s)) => Ok(s.to_string()),
754        _ => Err(HostlibError::MissingParameter {
755            builtin,
756            param: key,
757        }),
758    }
759}
760
761fn optional_string(dict: &BTreeMap<String, VmValue>, key: &str) -> Option<String> {
762    match dict.get(key) {
763        Some(VmValue::String(s)) => Some(s.to_string()),
764        _ => None,
765    }
766}
767
768fn optional_string_list(dict: &BTreeMap<String, VmValue>, key: &str) -> Vec<String> {
769    match dict.get(key) {
770        Some(VmValue::List(items)) => items
771            .iter()
772            .filter_map(|v| match v {
773                VmValue::String(s) => Some(s.to_string()),
774                _ => None,
775            })
776            .collect(),
777        _ => Vec::new(),
778    }
779}
780
781fn optional_bool(dict: &BTreeMap<String, VmValue>, key: &str, default: bool) -> bool {
782    match dict.get(key) {
783        Some(VmValue::Bool(b)) => *b,
784        _ => default,
785    }
786}
787
788fn str_vm(s: impl AsRef<str>) -> VmValue {
789    VmValue::String(Arc::from(s.as_ref()))
790}
791
792fn dict_vm<const N: usize>(entries: [(&str, VmValue); N]) -> VmValue {
793    let map: BTreeMap<String, VmValue> = entries
794        .into_iter()
795        .map(|(k, v)| (k.to_string(), v))
796        .collect();
797    VmValue::Dict(Arc::new(map))
798}
799
800#[cfg(test)]
801mod tests {
802    use super::*;
803
804    fn dict(pairs: &[(&str, VmValue)]) -> VmValue {
805        let map: BTreeMap<String, VmValue> = pairs
806            .iter()
807            .map(|(k, v)| (k.to_string(), v.clone()))
808            .collect();
809        VmValue::Dict(Arc::new(map))
810    }
811
812    fn get<'a>(v: &'a VmValue, key: &str) -> &'a VmValue {
813        match v {
814            VmValue::Dict(d) => d.get(key).unwrap_or_else(|| panic!("missing {key}")),
815            _ => panic!("not a dict"),
816        }
817    }
818
819    fn int(v: &VmValue) -> i64 {
820        match v {
821            VmValue::Int(i) => *i,
822            other => panic!("not int: {other:?}"),
823        }
824    }
825
826    fn s(v: &VmValue) -> String {
827        match v {
828            VmValue::String(s) => s.to_string(),
829            other => panic!("not string: {other:?}"),
830        }
831    }
832
833    fn b(v: &VmValue) -> bool {
834        match v {
835            VmValue::Bool(b) => *b,
836            other => panic!("not bool: {other:?}"),
837        }
838    }
839
840    const SEARCH_RULE: &str = r#"
841        id = "find-calls"
842        language = "typescript"
843        [rule]
844        pattern = "$FN()"
845    "#;
846
847    #[test]
848    fn search_returns_matches_with_captures() {
849        let result = search_run(&[dict(&[
850            ("rule", str_vm(SEARCH_RULE)),
851            ("source", str_vm("foo();\nbar();\n")),
852            ("language", str_vm("typescript")),
853        ])])
854        .unwrap();
855        assert_eq!(int(get(&result, "match_count")), 2);
856        let matches = match get(&result, "matches") {
857            VmValue::List(l) => l.clone(),
858            _ => panic!(),
859        };
860        assert_eq!(s(get(get(&matches[0], "captures"), "FN")), "foo");
861    }
862
863    #[test]
864    fn search_returns_harn_capture_metadata() {
865        let rule = r#"
866            id = "int-logs"
867            language = "harn"
868            [rule]
869            pattern = "log($VALUE)"
870        "#;
871        let result = search_run(&[dict(&[
872            ("rule", str_vm(rule)),
873            (
874                "source",
875                str_vm("fn main() {\n  let count: int = 1\n  log(count)\n}\n"),
876            ),
877            ("language", str_vm("harn")),
878        ])])
879        .unwrap();
880        let matches = match get(&result, "matches") {
881            VmValue::List(l) => l.clone(),
882            _ => panic!(),
883        };
884        let metadata = get(get(&matches[0], "capture_metadata"), "VALUE");
885        assert_eq!(s(get(metadata, "type")), "int");
886        assert_eq!(s(get(get(metadata, "resolved"), "name")), "count");
887        assert_eq!(s(get(get(metadata, "resolved"), "kind")), "let");
888    }
889
890    #[test]
891    fn report_returns_a_data_table() {
892        let result = report_run(&[dict(&[
893            ("rule", str_vm(SEARCH_RULE)),
894            ("source", str_vm("foo();\nbar();\n")),
895            ("language", str_vm("typescript")),
896            ("path", str_vm("a.ts")),
897        ])])
898        .unwrap();
899        assert_eq!(int(get(get(&result, "summary"), "total_rows")), 2);
900        assert_eq!(s(get(&result, "rule_id")), "find-calls");
901    }
902
903    #[test]
904    fn apply_dry_run_previews_without_writing() {
905        let rule = r#"
906            id = "rename"
907            language = "typescript"
908            safety = "behavior-preserving"
909            fix = "bar()"
910            [rule]
911            pattern = "foo()"
912        "#;
913        let result = apply_run(&[dict(&[
914            ("rule", str_vm(rule)),
915            ("source", str_vm("foo();\n")),
916            ("language", str_vm("typescript")),
917            ("dry_run", VmValue::Bool(true)),
918        ])])
919        .unwrap();
920        let files = match get(&result, "files") {
921            VmValue::List(l) => l.clone(),
922            _ => panic!(),
923        };
924        assert!(b(get(&files[0], "changed")));
925        assert!(!b(get(&files[0], "applied")));
926        assert_eq!(s(get(&files[0], "preview")), "bar();\n");
927    }
928
929    const UGLY_HARN_CODEMOD: &str = r#"
930        id = "dd"
931        language = "harn"
932        safety = "scope-local"
933        fix = "let {$K=$D}=$X"
934        [rule]
935        pattern = "let $K = $X?.$K ?? $D"
936    "#;
937
938    #[test]
939    fn apply_formats_harn_output_by_default() {
940        // The fix template is deliberately ugly; the #2847 fmt post-pass
941        // normalizes the rewritten `.harn` (so a batch lands fmt-stable).
942        let result = apply_run(&[dict(&[
943            ("rule", str_vm(UGLY_HARN_CODEMOD)),
944            (
945                "source",
946                str_vm("fn main() {\n  let timeout = cfg?.timeout ?? 30\n}\n"),
947            ),
948            ("language", str_vm("harn")),
949            ("dry_run", VmValue::Bool(true)),
950        ])])
951        .unwrap();
952        let files = match get(&result, "files") {
953            VmValue::List(l) => l.clone(),
954            _ => panic!(),
955        };
956        assert!(b(get(&files[0], "changed")));
957        assert!(b(get(&files[0], "formatted")));
958        let preview = s(get(&files[0], "preview"));
959        assert!(preview.contains("= 30"), "preview not formatted: {preview}");
960    }
961
962    #[test]
963    fn apply_format_false_leaves_raw_output() {
964        let result = apply_run(&[dict(&[
965            ("rule", str_vm(UGLY_HARN_CODEMOD)),
966            (
967                "source",
968                str_vm("fn main() {\n  let timeout = cfg?.timeout ?? 30\n}\n"),
969            ),
970            ("language", str_vm("harn")),
971            ("dry_run", VmValue::Bool(true)),
972            ("format", VmValue::Bool(false)),
973        ])])
974        .unwrap();
975        let files = match get(&result, "files") {
976            VmValue::List(l) => l.clone(),
977            _ => panic!(),
978        };
979        assert!(!b(get(&files[0], "formatted")));
980        let preview = s(get(&files[0], "preview"));
981        assert!(preview.contains("{timeout=30}"), "expected raw: {preview}");
982    }
983
984    #[test]
985    fn diagnostics_returns_lint_findings() {
986        let lint = r#"
987            id = "calls"
988            language = "typescript"
989            message = "function call"
990            [rule]
991            pattern = "$FN()"
992        "#;
993        let result = diagnostics_run(&[dict(&[
994            ("rule", str_vm(lint)),
995            ("source", str_vm("foo();\nbar();\n")),
996            ("language", str_vm("typescript")),
997            ("path", str_vm("a.ts")),
998        ])])
999        .unwrap();
1000        assert_eq!(int(get(&result, "diagnostic_count")), 2);
1001        let diags = match get(&result, "diagnostics") {
1002            VmValue::List(l) => l.clone(),
1003            _ => panic!(),
1004        };
1005        assert_eq!(s(get(&diags[0], "message")), "function call");
1006        assert_eq!(s(get(&diags[0], "severity")), "warning");
1007        // No `fix` and default safety → a suggestion, not machine-applicable.
1008        assert_eq!(s(get(&diags[0], "applicability")), "suggestion");
1009        assert_eq!(int(get(&diags[1], "start_row")), 1);
1010        assert!(matches!(get(&diags[0], "fix"), VmValue::Nil));
1011    }
1012
1013    #[test]
1014    fn report_helpers_round_trip_severity_and_safety() {
1015        // The string<->enum mapping used by `rules.visit` reports.
1016        assert_eq!(parse_severity("error"), Some(Severity::Error));
1017        assert_eq!(parse_severity("bogus"), None);
1018        assert_eq!(parse_safety("format-only"), Some(Safety::FormatOnly));
1019        assert_eq!(parse_safety("needs-human"), Some(Safety::NeedsHuman));
1020        assert_eq!(parse_safety("nope"), None);
1021        // `true` flags with defaults; nil/false skip; a dict carries fields.
1022        assert_eq!(reports_from_return(VmValue::Bool(true)).len(), 1);
1023        assert_eq!(reports_from_return(VmValue::Nil).len(), 0);
1024        assert_eq!(reports_from_return(VmValue::Bool(false)).len(), 0);
1025        let list = VmValue::List(Arc::new(vec![
1026            dict(&[("message", str_vm("a"))]),
1027            VmValue::Nil,
1028            dict(&[("message", str_vm("b"))]),
1029        ]));
1030        assert_eq!(reports_from_return(list).len(), 2);
1031    }
1032
1033    #[test]
1034    fn capability_does_not_register_the_async_visitor() {
1035        // `rules.visit` is async, so it is installed directly on the VM in
1036        // `install`, not through the sync capability registry.
1037        let mut registry = BuiltinRegistry::new();
1038        RulesCapability.register_builtins(&mut registry);
1039        let names: Vec<_> = registry.iter().map(|b| b.name).collect();
1040        assert!(!names.contains(&VISIT));
1041        assert!(names.contains(&DIAGNOSTICS));
1042    }
1043
1044    #[test]
1045    fn missing_rule_is_an_error() {
1046        let err = search_run(&[dict(&[
1047            ("source", str_vm("x")),
1048            ("language", str_vm("rust")),
1049        ])]);
1050        assert!(matches!(
1051            err,
1052            Err(HostlibError::MissingParameter { param: "rule", .. })
1053        ));
1054    }
1055
1056    #[test]
1057    fn capability_registers_the_sync_builtins() {
1058        let mut registry = BuiltinRegistry::new();
1059        RulesCapability.register_builtins(&mut registry);
1060        let names: Vec<_> = registry.iter().map(|b| b.name).collect();
1061        assert_eq!(names, vec![SEARCH, REPORT, DIAGNOSTICS, APPLY, FOLD]);
1062    }
1063
1064    #[test]
1065    fn lint_capability_registers_run() {
1066        let mut registry = BuiltinRegistry::new();
1067        LintCapability.register_builtins(&mut registry);
1068        let names: Vec<_> = registry.iter().map(|b| b.name).collect();
1069        assert_eq!(names, vec![LINT_RUN]);
1070    }
1071
1072    #[test]
1073    fn lint_run_returns_the_linter_findings() {
1074        let result =
1075            lint_run(&[dict(&[("source", str_vm("fn f() {\n  let x = (1)\n}\n"))])]).unwrap();
1076        assert_eq!(s(get(&result, "result")), "ok");
1077        let diags = match get(&result, "diagnostics") {
1078            VmValue::List(l) => l.clone(),
1079            _ => panic!(),
1080        };
1081        assert!(
1082            diags
1083                .iter()
1084                .any(|d| s(get(d, "rule")) == "unnecessary-parentheses"),
1085            "expected unnecessary-parentheses, got {diags:?}"
1086        );
1087    }
1088
1089    #[test]
1090    fn lint_run_applies_a_severity_override() {
1091        let result = lint_run(&[dict(&[
1092            ("source", str_vm("fn f() {\n  let x = (1)\n}\n")),
1093            (
1094                "severity",
1095                dict(&[("unnecessary-parentheses", str_vm("error"))]),
1096            ),
1097        ])])
1098        .unwrap();
1099        let diags = match get(&result, "diagnostics") {
1100            VmValue::List(l) => l.clone(),
1101            _ => panic!(),
1102        };
1103        let d = diags
1104            .iter()
1105            .find(|d| s(get(d, "rule")) == "unnecessary-parentheses")
1106            .expect("rule present");
1107        assert_eq!(s(get(d, "severity")), "error");
1108    }
1109}