Skip to main content

harn_rules/
engine.rs

1//! Compile a [`Rule`] into a runnable matcher and run it against source.
2//!
3//! The atomic tier supports three matcher forms, all reduced to a single
4//! [`RuleMatch`] stream:
5//!
6//! - `pattern` → compiled to a tree-sitter query via [`crate::pattern`].
7//! - `kind` → the trivial query `(<kind>) @__match`.
8//! - `regex` → a text regex over the source, yielding spans with no AST
9//!   metavar bindings.
10
11use std::collections::BTreeMap;
12
13use harn_hostlib::ast::Language;
14
15use crate::constraint::CompiledConstraint;
16use crate::error::RulesError;
17use crate::evaluator::CompiledRuleTree;
18use crate::fix::{interpolate, splice, AppliedEdit};
19use crate::model::{Applicability, Rule, Safety, Severity};
20use crate::transform::CompiledTransform;
21
22/// A byte + row/col span. Rows/cols are 0-based, matching the rest of the
23/// Harn AST wire format.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub struct Span {
26    /// Start byte offset.
27    pub start_byte: usize,
28    /// End byte offset (exclusive).
29    pub end_byte: usize,
30    /// 0-based start row.
31    pub start_row: usize,
32    /// 0-based start column.
33    pub start_col: usize,
34    /// 0-based end row.
35    pub end_row: usize,
36    /// 0-based end column.
37    pub end_col: usize,
38}
39
40impl Span {
41    pub(crate) fn of(node: tree_sitter::Node<'_>) -> Self {
42        let start = node.start_position();
43        let end = node.end_position();
44        Span {
45            start_byte: node.start_byte(),
46            end_byte: node.end_byte(),
47            start_row: start.row,
48            start_col: start.column,
49            end_row: end.row,
50            end_col: end.column,
51        }
52    }
53}
54
55/// A metavariable binding: the captured text plus where it lives.
56#[derive(Debug, Clone)]
57pub struct Binding {
58    /// The captured source text.
59    pub text: String,
60    /// The captured node's span.
61    pub span: Span,
62}
63
64/// One match of a rule against a file.
65#[derive(Debug, Clone)]
66pub struct RuleMatch {
67    /// The rule that produced this match.
68    pub rule_id: String,
69    /// The whole matched range (the pattern root, or the regex span).
70    pub span: Span,
71    /// The matched source text.
72    pub text: String,
73    /// Metavar bindings, keyed by name (without the leading `$`). Empty for
74    /// `kind` and `regex` matchers.
75    pub bindings: BTreeMap<String, Binding>,
76}
77
78/// The result of applying a codemod rule's `fix` to a source string.
79#[derive(Debug, Clone)]
80pub struct CodemodResult {
81    /// The rewritten source (equals the input when nothing matched).
82    pub rewritten: String,
83    /// The per-match edits that were spliced in, in document order.
84    pub edits: Vec<AppliedEdit>,
85    /// Whether the rewrite changed the source.
86    pub changed: bool,
87    /// The rule's declared safety tier.
88    pub safety: Safety,
89    /// Whether the fix may be auto-applied or is opt-in only.
90    pub applicability: Applicability,
91    /// Whether re-running the fix on `rewritten` yields no further change
92    /// (a fix should reach a fixed point).
93    pub idempotent: bool,
94}
95
96/// A rule whose matcher has been compiled and is ready to run.
97pub struct CompiledRule {
98    rule_id: String,
99    language: Language,
100    execution: Execution,
101    /// `where` predicates; a match survives only when all hold.
102    constraints: Vec<CompiledConstraint>,
103    /// `transform` definitions: (new metavar name, compiled transform).
104    transforms: Vec<(String, CompiledTransform)>,
105    /// The `fix` replacement template, if this is a codemod.
106    fix: Option<String>,
107    /// The fix's safety tier (gates auto-apply).
108    safety: Safety,
109    /// The diagnostic message (empty for search-only rules).
110    message: String,
111    /// The diagnostic severity.
112    severity: Severity,
113}
114
115/// A diagnostic produced by running a rule — the mapping surface onto the
116/// linter's `LintDiagnostic` / `FixEdit` (Epic C / the LSP reuse this).
117#[derive(Debug, Clone)]
118pub struct Diagnostic {
119    /// The rule id (also the diagnostic code).
120    pub rule_id: String,
121    /// The diagnostic message.
122    pub message: String,
123    /// The severity.
124    pub severity: Severity,
125    /// The flagged span.
126    pub span: Span,
127    /// Whether a fix, if present, is auto-applicable or a suggestion.
128    pub applicability: Applicability,
129    /// The interpolated fix replacement for this match, if the rule has a
130    /// `fix`.
131    pub fix: Option<String>,
132}
133
134enum Execution {
135    /// A top-level pure-`regex` rule: scan the raw source text (grep-style),
136    /// independent of the tree. Its match span is the regex match range.
137    SourceRegex(regex::Regex),
138    /// The full matching algebra (atomic + relational + composite).
139    Tree(Box<CompiledRuleTree>),
140}
141
142impl CompiledRule {
143    /// Resolve the rule's language and grammar, then compile its matcher.
144    pub fn compile(rule: &Rule) -> Result<Self, RulesError> {
145        let language =
146            Language::from_name(&rule.language).ok_or_else(|| RulesError::UnknownLanguage {
147                rule: rule.id.clone(),
148                language: rule.language.clone(),
149            })?;
150
151        // A top-level pure-`regex` rule greps the source text directly; any
152        // other shape (pattern / kind / relational / composite) compiles to
153        // the tree-walking algebra.
154        let execution = if rule.rule.is_pure_regex() {
155            let pattern = rule.rule.regex.as_ref().expect("pure regex");
156            Execution::SourceRegex(regex::Regex::new(pattern).map_err(|err| {
157                RulesError::PatternCompile {
158                    rule: rule.id.clone(),
159                    message: format!("invalid regex `{pattern}`: {err}"),
160                }
161            })?)
162        } else {
163            Execution::Tree(Box::new(CompiledRuleTree::compile(
164                &rule.id,
165                language,
166                &rule.rule,
167                &rule.utils,
168            )?))
169        };
170
171        let constraints = rule
172            .where_constraints
173            .iter()
174            .map(|c| CompiledConstraint::compile(&rule.id, language, c))
175            .collect::<Result<Vec<_>, _>>()?;
176
177        let transforms = rule
178            .transform
179            .iter()
180            .map(|(name, t)| {
181                CompiledTransform::compile(&rule.id, name, t).map(|c| (name.clone(), c))
182            })
183            .collect::<Result<Vec<_>, _>>()?;
184
185        Ok(CompiledRule {
186            rule_id: rule.id.clone(),
187            language,
188            execution,
189            constraints,
190            transforms,
191            fix: rule.fix.clone(),
192            safety: rule.safety,
193            message: rule.message.clone(),
194            severity: rule.severity,
195        })
196    }
197
198    /// The language this rule targets.
199    pub fn language(&self) -> Language {
200        self.language
201    }
202
203    /// The fix's declared safety tier.
204    pub fn safety(&self) -> Safety {
205        self.safety
206    }
207
208    /// Whether this rule's fix may be auto-applied (machine-applicable) or
209    /// is opt-in only (suggestion).
210    pub fn applicability(&self) -> Applicability {
211        self.safety.applicability()
212    }
213
214    /// The rule's id.
215    pub fn id(&self) -> &str {
216        &self.rule_id
217    }
218
219    /// Run the compiled rule against `source`, returning matches in
220    /// document order. Matches that fail any `where` constraint are dropped.
221    pub fn run(&self, source: &str) -> Result<Vec<RuleMatch>, RulesError> {
222        let mut matches = match &self.execution {
223            Execution::SourceRegex(regex) => self.run_regex(regex, source),
224            Execution::Tree(tree) => tree
225                .find(&self.rule_id, self.language, source)?
226                .into_iter()
227                .map(|m| RuleMatch {
228                    rule_id: self.rule_id.clone(),
229                    span: m.span,
230                    text: m.text,
231                    bindings: m.bindings,
232                })
233                .collect(),
234        };
235        if !self.constraints.is_empty() {
236            matches.retain(|m| self.satisfies_constraints(m));
237        }
238        Ok(matches)
239    }
240
241    /// True when every `where` constraint holds for this match. A
242    /// constraint whose metavar is unbound (not captured) fails closed.
243    fn satisfies_constraints(&self, m: &RuleMatch) -> bool {
244        self.constraints.iter().all(|c| {
245            m.bindings
246                .get(&c.metavar)
247                .is_some_and(|b| c.evaluate(&b.text))
248        })
249    }
250
251    /// Apply this codemod rule's `fix` to `source`, returning the rewritten
252    /// text plus the per-match edits. Each match's `fix` template is
253    /// interpolated from its captured metavars plus any `transform`-derived
254    /// ones. Errors if the rule has no `fix`.
255    ///
256    /// This computes the preview only — it does not enforce the safety gate.
257    /// Use [`CompiledRule::auto_apply`] to refuse non-machine-applicable
258    /// fixes, or [`CompiledRule::apply_checked`] to also assert idempotency.
259    pub fn apply(&self, source: &str) -> Result<CodemodResult, RulesError> {
260        let (rewritten, edits) = self.rewrite(source)?;
261        let changed = rewritten != source;
262        // Idempotency: re-running the fix on its own output must not change
263        // it further (the fix should reach a fixed point).
264        let (twice, _) = self.rewrite(&rewritten)?;
265        let idempotent = twice == rewritten;
266        Ok(CodemodResult {
267            rewritten,
268            edits,
269            changed,
270            safety: self.safety,
271            applicability: self.applicability(),
272            idempotent,
273        })
274    }
275
276    /// Like [`CompiledRule::apply`], but refuses to apply a fix whose
277    /// `safety` is above the machine-applicable threshold (`scope-local` and
278    /// riskier). This is the gate `harn codemod --apply` uses by default.
279    pub fn auto_apply(&self, source: &str) -> Result<CodemodResult, RulesError> {
280        if !self.safety.is_auto_applicable() {
281            return Err(RulesError::NotAutoApplicable {
282                rule: self.rule_id.clone(),
283                safety: format!("{:?}", self.safety),
284            });
285        }
286        self.apply(source)
287    }
288
289    /// Like [`CompiledRule::apply`], but fails if the fix is not idempotent.
290    /// Used by the codemod runner and the rule-test harness to assert a fix
291    /// reaches a fixed point.
292    pub fn apply_checked(&self, source: &str) -> Result<CodemodResult, RulesError> {
293        let result = self.apply(source)?;
294        if !result.idempotent {
295            return Err(RulesError::NotIdempotent {
296                rule: self.rule_id.clone(),
297            });
298        }
299        Ok(result)
300    }
301
302    /// Run the rule and produce one [`Diagnostic`] per match — the surface
303    /// the linter (Epic C) and the LSP convert into `LintDiagnostic` /
304    /// `FixEdit`. Each diagnostic carries the interpolated fix (if any) and
305    /// its applicability tier.
306    pub fn diagnostics(&self, source: &str) -> Result<Vec<Diagnostic>, RulesError> {
307        let applicability = self.applicability();
308        let matches = self.run(source)?;
309        Ok(matches
310            .iter()
311            .map(|m| Diagnostic {
312                rule_id: self.rule_id.clone(),
313                message: self.message.clone(),
314                severity: self.severity,
315                span: m.span,
316                applicability,
317                fix: self.fix.as_ref().map(|template| {
318                    let vars = self.metavars_for(m);
319                    interpolate(template, &vars)
320                }),
321            })
322            .collect())
323    }
324
325    /// The core rewrite: run the rule and splice each match's interpolated
326    /// fix. Returns the rewritten text and the edits.
327    fn rewrite(&self, source: &str) -> Result<(String, Vec<AppliedEdit>), RulesError> {
328        let template = self
329            .fix
330            .as_ref()
331            .ok_or_else(|| RulesError::PatternCompile {
332                rule: self.rule_id.clone(),
333                message: "apply requires a `fix` template; this rule has none".into(),
334            })?;
335
336        let matches = self.run(source)?;
337        let edits: Vec<AppliedEdit> = matches
338            .iter()
339            .map(|m| {
340                let vars = self.metavars_for(m);
341                AppliedEdit {
342                    span: m.span,
343                    before: m.text.clone(),
344                    replacement: interpolate(template, &vars),
345                }
346            })
347            .collect();
348        Ok((splice(source, &edits), edits))
349    }
350
351    /// Build the full metavar map for a match: captured bindings plus the
352    /// `transform`-synthesized metavars (which may shadow captures).
353    fn metavars_for(&self, m: &RuleMatch) -> BTreeMap<String, String> {
354        let mut vars: BTreeMap<String, String> = m
355            .bindings
356            .iter()
357            .map(|(name, binding)| (name.clone(), binding.text.clone()))
358            .collect();
359        for (name, transform) in &self.transforms {
360            let input = m
361                .bindings
362                .get(&transform.source)
363                .map(|b| b.text.as_str())
364                .unwrap_or("");
365            vars.insert(name.clone(), transform.apply(input));
366        }
367        vars
368    }
369
370    fn run_regex(&self, regex: &regex::Regex, source: &str) -> Vec<RuleMatch> {
371        let mut matches = Vec::new();
372        for m in regex.find_iter(source) {
373            let span = byte_span(source, m.start(), m.end());
374            matches.push(RuleMatch {
375                rule_id: self.rule_id.clone(),
376                span,
377                text: m.as_str().to_string(),
378                bindings: BTreeMap::new(),
379            });
380        }
381        matches
382    }
383}
384
385/// Compute a [`Span`] for a byte range by counting rows/cols. Used by the
386/// regex matcher, which has no tree-sitter node to read positions from.
387fn byte_span(source: &str, start: usize, end: usize) -> Span {
388    let (start_row, start_col) = row_col(source, start);
389    let (end_row, end_col) = row_col(source, end);
390    Span {
391        start_byte: start,
392        end_byte: end,
393        start_row,
394        start_col,
395        end_row,
396        end_col,
397    }
398}
399
400fn row_col(source: &str, byte: usize) -> (usize, usize) {
401    let mut row = 0;
402    let mut col = 0;
403    for (i, ch) in source.char_indices() {
404        if i >= byte {
405            break;
406        }
407        if ch == '\n' {
408            row += 1;
409            col = 0;
410        } else {
411            col += 1;
412        }
413    }
414    (row, col)
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420    use crate::model::Rule;
421
422    fn rule(toml: &str) -> CompiledRule {
423        let parsed = Rule::from_toml_str(toml).expect("rule parses");
424        CompiledRule::compile(&parsed).expect("rule compiles")
425    }
426
427    #[test]
428    fn pattern_rule_binds_metavars() {
429        let compiled = rule(
430            r#"
431            id = "destructure-default"
432            language = "typescript"
433            fix = "{ $KEY: $SRC }"
434            [rule]
435            pattern = "$SRC?.$KEY ?? $DEFAULT"
436            "#,
437        );
438        let matches = compiled
439            .run("const a = cfg?.timeout ?? 30;\nconst b = opts?.retries ?? 3;\n")
440            .unwrap();
441        assert_eq!(matches.len(), 2);
442        assert_eq!(matches[0].bindings["SRC"].text, "cfg");
443        assert_eq!(matches[0].bindings["KEY"].text, "timeout");
444        assert_eq!(matches[0].bindings["DEFAULT"].text, "30");
445        assert_eq!(matches[1].bindings["SRC"].text, "opts");
446        // The match span covers the whole expression.
447        assert_eq!(matches[0].text, "cfg?.timeout ?? 30");
448        assert_eq!(matches[0].span.start_row, 0);
449        assert_eq!(matches[1].span.start_row, 1);
450    }
451
452    #[test]
453    fn kind_rule_matches_node_kind() {
454        let compiled = rule(
455            r#"
456            id = "find-calls"
457            language = "python"
458            [rule]
459            kind = "call"
460            "#,
461        );
462        let matches = compiled.run("print(x)\nlog(y)\n").unwrap();
463        assert_eq!(matches.len(), 2);
464        assert_eq!(matches[0].text, "print(x)");
465        assert!(matches[0].bindings.is_empty());
466    }
467
468    #[test]
469    fn regex_rule_matches_text() {
470        let compiled = rule(
471            r#"
472            id = "todo"
473            language = "rust"
474            message = "Found a TODO"
475            [rule]
476            regex = "TODO\\(\\w+\\)"
477            "#,
478        );
479        let matches = compiled
480            .run("fn f() {\n    // TODO(ken) fix\n    // todo lower\n}\n")
481            .unwrap();
482        assert_eq!(matches.len(), 1);
483        assert_eq!(matches[0].text, "TODO(ken)");
484        assert_eq!(matches[0].span.start_row, 1);
485    }
486
487    #[test]
488    fn unknown_language_is_an_error() {
489        let parsed = Rule::from_toml_str(
490            r#"
491            id = "x"
492            language = "cobol"
493            [rule]
494            kind = "foo"
495            "#,
496        )
497        .unwrap();
498        assert!(matches!(
499            CompiledRule::compile(&parsed),
500            Err(RulesError::UnknownLanguage { .. })
501        ));
502    }
503
504    #[test]
505    fn invalid_pattern_surfaces_compile_error() {
506        let parsed = Rule::from_toml_str(
507            r#"
508            id = "x"
509            language = "typescript"
510            [rule]
511            pattern = "foo($$$ARGS)"
512            "#,
513        )
514        .unwrap();
515        assert!(matches!(
516            CompiledRule::compile(&parsed),
517            Err(RulesError::PatternCompile { .. })
518        ));
519    }
520}