Skip to main content

harn_rules/
model.rs

1//! The declarative rule data model.
2//!
3//! A rule is the atomic unit the engine consumes: an identity (`id`,
4//! `language`, `severity`, `message`), a `rule` block describing *what to
5//! match* (the atomic tier: `pattern` snippet, `kind`, or `regex`), and an
6//! optional `fix` describing *how to rewrite* it. Relational/composite
7//! matching (#2833) and `where`/`transform` (#2834) extend this model;
8//! this module is the atomic-tier surface they build on.
9
10use std::collections::BTreeMap;
11
12use serde::Deserialize;
13
14/// Diagnostic severity. Mirrors the `harn-lint` vocabulary so findings can
15/// flow into the same reporting surface.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
17#[serde(rename_all = "lowercase")]
18pub enum Severity {
19    /// Informational; no action required.
20    Info,
21    /// Default — something worth a human's attention.
22    #[default]
23    Warning,
24    /// A problem that should block.
25    Error,
26}
27
28impl Severity {
29    /// The stable lowercase name (the inverse of the `Deserialize` rename),
30    /// used for diagnostics and JSON surfaces.
31    pub fn as_str(self) -> &'static str {
32        match self {
33            Severity::Info => "info",
34            Severity::Warning => "warning",
35            Severity::Error => "error",
36        }
37    }
38}
39
40/// How risky a rule's `fix` is, mapped onto Burin's edit-safety taxonomy.
41/// Ordered least → most dangerous; the codemod runner auto-applies only the
42/// two safest tiers (see [`Safety::applicability`]).
43#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Deserialize)]
44#[serde(rename_all = "kebab-case")]
45pub enum Safety {
46    /// Whitespace / formatting only.
47    FormatOnly,
48    /// Semantics-preserving rewrite.
49    BehaviorPreserving,
50    /// Changes behavior, but only within the matched scope. **Default** —
51    /// conservative, so an undeclared codemod does not silently auto-apply.
52    #[default]
53    ScopeLocal,
54    /// Changes an externally-visible surface (a signature, an export).
55    SurfaceChanging,
56    /// Changes capabilities / effects (I/O, permissions).
57    CapabilityChanging,
58    /// Always requires a human in the loop.
59    NeedsHuman,
60}
61
62/// Whether a fix may be auto-applied (clippy/ESLint `machine-applicable`)
63/// or is opt-in only (`suggestion`).
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum Applicability {
66    /// Safe to auto-apply (`format-only` / `behavior-preserving`).
67    MachineApplicable,
68    /// Preview / opt-in only.
69    Suggestion,
70}
71
72impl Applicability {
73    /// The stable name used for diagnostics and JSON surfaces.
74    pub fn as_str(self) -> &'static str {
75        match self {
76            Applicability::MachineApplicable => "machine-applicable",
77            Applicability::Suggestion => "suggestion",
78        }
79    }
80}
81
82impl Safety {
83    /// The stable kebab-case name (the inverse of the `Deserialize` rename),
84    /// used for diagnostics and JSON surfaces.
85    pub fn as_str(self) -> &'static str {
86        match self {
87            Safety::FormatOnly => "format-only",
88            Safety::BehaviorPreserving => "behavior-preserving",
89            Safety::ScopeLocal => "scope-local",
90            Safety::SurfaceChanging => "surface-changing",
91            Safety::CapabilityChanging => "capability-changing",
92            Safety::NeedsHuman => "needs-human",
93        }
94    }
95
96    /// The applicability tier this safety level maps to. `format-only` and
97    /// `behavior-preserving` are machine-applicable; everything riskier is a
98    /// suggestion.
99    pub fn applicability(self) -> Applicability {
100        if self <= Safety::BehaviorPreserving {
101            Applicability::MachineApplicable
102        } else {
103            Applicability::Suggestion
104        }
105    }
106
107    /// True when the runner may auto-apply this rule's fix without an
108    /// explicit opt-in.
109    pub fn is_auto_applicable(self) -> bool {
110        self.applicability() == Applicability::MachineApplicable
111    }
112}
113
114/// What flavor of work a rule performs, derived from its shape rather than
115/// declared: a rule with a `fix` is a codemod; one with a `message` but no
116/// `fix` is a lint; a bare matcher is a search.
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum RuleKind {
119    /// Find-only: report matches, no diagnostic text, no rewrite.
120    Search,
121    /// Report a diagnostic (`message` + `severity`), no rewrite.
122    Lint,
123    /// Rewrite matches via `fix`.
124    Codemod,
125}
126
127/// The atomic-tier matcher. Exactly one of `pattern` / `kind` / `regex`
128/// must be set on a node that carries one; [`RuleNode::atomic`] resolves it.
129///
130/// A `RuleNode` is the recursive matching algebra: an optional **atomic**
131/// leaf (`pattern` / `kind` / `regex`), **relational** constraints
132/// (`inside` / `has` / `follows` / `precedes`, each a sub-node tuned by
133/// `stop_by` / `field`), and **composite** combinators (`all` / `any` /
134/// `not` / `matches`). Every key set on a node is ANDed: the node matches a
135/// tree-sitter node iff its atomic part matches *and* every relational and
136/// composite part holds.
137#[derive(Debug, Clone, Default, Deserialize)]
138pub struct RuleNode {
139    /// A code snippet in the target grammar with `$VAR` metavariable holes.
140    pub pattern: Option<String>,
141    /// A bare tree-sitter node kind (e.g. `"call_expression"`).
142    pub kind: Option<String>,
143    /// A regular expression matched against node text.
144    pub regex: Option<String>,
145
146    /// The node must be **inside** a node matching this sub-rule (ancestor).
147    pub inside: Option<Box<RuleNode>>,
148    /// The node must **have** a descendant matching this sub-rule.
149    pub has: Option<Box<RuleNode>>,
150    /// The node must **follow** a node matching this sub-rule (earlier).
151    pub follows: Option<Box<RuleNode>>,
152    /// The node must **precede** a node matching this sub-rule (later).
153    pub precedes: Option<Box<RuleNode>>,
154
155    /// Relational reach (used when this node is an `inside`/`has`/… target):
156    /// `neighbor` (direct only, default), `end` (transitive), or a rule that
157    /// halts the walk. (TOML `stopBy` or `stop_by`.)
158    #[serde(default, alias = "stopBy")]
159    pub stop_by: Option<StopBy>,
160    /// Restrict an `inside`/`has` relation to a specific tree-sitter field.
161    pub field: Option<String>,
162
163    /// Every sub-rule must match the node.
164    pub all: Option<Vec<RuleNode>>,
165    /// At least one sub-rule must match the node.
166    pub any: Option<Vec<RuleNode>>,
167    /// The sub-rule must NOT match the node.
168    pub not: Option<Box<RuleNode>>,
169    /// Reference a utility rule by id (resolved from `[utils]`).
170    pub matches: Option<String>,
171}
172
173/// How far a relational op (`inside` / `has` / `follows` / `precedes`)
174/// walks the tree looking for a match.
175#[derive(Debug, Clone, Deserialize)]
176#[serde(untagged)]
177pub enum StopBy {
178    /// `"neighbor"` (direct parent/child/sibling only) or `"end"`
179    /// (transitive — walk to the tree boundary).
180    Keyword(StopKeyword),
181    /// Walk until a node matching this rule is reached, then stop.
182    Rule(Box<RuleNode>),
183}
184
185/// The keyword forms of [`StopBy`].
186#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
187#[serde(rename_all = "lowercase")]
188pub enum StopKeyword {
189    /// Only the immediate neighbor (default).
190    Neighbor,
191    /// Transitive — walk all the way to the tree boundary.
192    End,
193}
194
195/// The resolved, exactly-one atomic matcher.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub enum AtomicMatcher {
198    /// A snippet pattern with metavariable holes.
199    Pattern(String),
200    /// A tree-sitter node kind.
201    Kind(String),
202    /// A regex over node text.
203    Regex(String),
204}
205
206impl RuleNode {
207    /// Resolve this node's atomic leaf. `Ok(None)` when the node is purely
208    /// relational/composite; `Err` when more than one atomic key is set.
209    pub fn atomic(&self) -> Result<Option<AtomicMatcher>, String> {
210        let set: Vec<&str> = [
211            self.pattern.as_ref().map(|_| "pattern"),
212            self.kind.as_ref().map(|_| "kind"),
213            self.regex.as_ref().map(|_| "regex"),
214        ]
215        .into_iter()
216        .flatten()
217        .collect();
218        match set.as_slice() {
219            [] => Ok(None),
220            [one] => Ok(Some(match *one {
221                "pattern" => AtomicMatcher::Pattern(self.pattern.clone().unwrap()),
222                "kind" => AtomicMatcher::Kind(self.kind.clone().unwrap()),
223                _ => AtomicMatcher::Regex(self.regex.clone().unwrap()),
224            })),
225            many => Err(format!(
226                "rule node sets multiple atomic matchers ({}); set at most one",
227                many.join(", ")
228            )),
229        }
230    }
231
232    /// True when `regex` is the only key set — a top-level grep-style rule
233    /// that scans source text rather than the tree.
234    pub fn is_pure_regex(&self) -> bool {
235        self.regex.is_some()
236            && self.pattern.is_none()
237            && self.kind.is_none()
238            && self.inside.is_none()
239            && self.has.is_none()
240            && self.follows.is_none()
241            && self.precedes.is_none()
242            && self.all.is_none()
243            && self.any.is_none()
244            && self.not.is_none()
245            && self.matches.is_none()
246    }
247
248    /// True when the node sets no matching keys at all (an empty node, which
249    /// is a rule authoring error).
250    pub fn is_empty(&self) -> bool {
251        self.pattern.is_none()
252            && self.kind.is_none()
253            && self.regex.is_none()
254            && self.inside.is_none()
255            && self.has.is_none()
256            && self.follows.is_none()
257            && self.precedes.is_none()
258            && self.all.is_none()
259            && self.any.is_none()
260            && self.not.is_none()
261            && self.matches.is_none()
262    }
263}
264
265/// A single declarative rule.
266#[derive(Debug, Clone, Deserialize)]
267#[serde(deny_unknown_fields)]
268pub struct Rule {
269    /// Stable identifier (also the diagnostic code).
270    pub id: String,
271    /// Target language name (resolved via `harn_hostlib::ast::Language`).
272    pub language: String,
273    /// Diagnostic severity. Defaults to `warning`.
274    #[serde(default)]
275    pub severity: Severity,
276    /// Human-readable diagnostic message. Empty for search-only rules.
277    #[serde(default)]
278    pub message: String,
279    /// How risky the `fix` is. Gates auto-apply. Defaults to `scope-local`.
280    #[serde(default)]
281    pub safety: Safety,
282    /// The matcher block (atomic / relational / composite algebra).
283    pub rule: RuleNode,
284    /// Local utility rules referenced by `matches`, keyed by id.
285    /// (TOML `[utils.NAME]`.)
286    #[serde(default)]
287    pub utils: BTreeMap<String, RuleNode>,
288    /// Predicates on captured metavars; a match survives only when every
289    /// constraint holds. (TOML `[[where]]`.)
290    #[serde(default, rename = "where")]
291    pub where_constraints: Vec<Constraint>,
292    /// Derived metavars synthesized from captured ones before `fix`
293    /// interpolation, keyed by the new metavar name. (TOML `[transform.X]`.)
294    #[serde(default)]
295    pub transform: BTreeMap<String, Transform>,
296    /// Replacement template. Its presence makes the rule a codemod.
297    #[serde(default)]
298    pub fix: Option<String>,
299}
300
301/// A `where` predicate on a captured metavar. Exactly one of `regex` /
302/// `comparison` / `pattern` / `resolves_to` / `type` is set.
303#[derive(Debug, Clone, Deserialize)]
304#[serde(deny_unknown_fields)]
305pub struct Constraint {
306    /// The metavar this constraint applies to (without the leading `$`).
307    pub metavar: String,
308    /// The metavar's text must match this regex.
309    #[serde(default)]
310    pub regex: Option<String>,
311    /// The metavar's text, parsed as a number, must satisfy this
312    /// comparison (Semgrep `metavariable-comparison`).
313    #[serde(default)]
314    pub comparison: Option<Comparison>,
315    /// A sub-pattern (Semgrep `metavariable-pattern`) run against the
316    /// metavar's captured text; the constraint holds when it matches.
317    #[serde(default)]
318    pub pattern: Option<String>,
319    /// Harn-only semantic filter: the captured node must resolve to a
320    /// declaration/binding matching this identity.
321    #[serde(default, alias = "resolvesTo")]
322    pub resolves_to: Option<ResolvedBindingConstraint>,
323    /// Harn-only semantic filter: the capture's attributed type must equal
324    /// this label.
325    #[serde(default, rename = "type")]
326    pub type_: Option<String>,
327    /// Optional language override for `pattern` — lets a captured string
328    /// literal be matched in a different grammar than the host file.
329    #[serde(default)]
330    pub language: Option<String>,
331}
332
333/// A Harn resolved-binding predicate for [`Constraint::resolves_to`].
334#[derive(Debug, Clone, Deserialize)]
335#[serde(deny_unknown_fields)]
336pub struct ResolvedBindingConstraint {
337    /// Exact resolved id (`<kind>:<name>@<line>:<column>`), if supplied.
338    #[serde(default)]
339    pub id: Option<String>,
340    /// Binding/declaration name.
341    #[serde(default)]
342    pub name: Option<String>,
343    /// Binding kind (`fn`, `param`, `let`, ...).
344    #[serde(default)]
345    pub kind: Option<String>,
346    /// 1-based declaration/binding line.
347    #[serde(default)]
348    pub line: Option<usize>,
349    /// 1-based declaration/binding column.
350    #[serde(default)]
351    pub column: Option<usize>,
352}
353
354/// A numeric/string comparison for a [`Constraint`].
355#[derive(Debug, Clone, Deserialize)]
356#[serde(deny_unknown_fields)]
357pub struct Comparison {
358    /// One of `<` `<=` `>` `>=` `==` `!=`.
359    pub op: String,
360    /// The right-hand side. Numbers compare numerically; strings/bools
361    /// compare with `==` / `!=` only.
362    pub value: toml::Value,
363}
364
365/// A metavar transform: read `source`, apply exactly one operation, bind
366/// the result under a new metavar name (the map key).
367#[derive(Debug, Clone, Deserialize)]
368#[serde(deny_unknown_fields)]
369pub struct Transform {
370    /// The source metavar name (without `$`) whose text is transformed.
371    pub source: String,
372    /// Regex find/replace.
373    #[serde(default)]
374    pub replace: Option<ReplaceOp>,
375    /// A character-index slice.
376    #[serde(default)]
377    pub substring: Option<SubstringOp>,
378    /// A case conversion.
379    #[serde(default)]
380    pub convert: Option<ConvertOp>,
381}
382
383/// Regex find/replace transform op.
384#[derive(Debug, Clone, Deserialize)]
385#[serde(deny_unknown_fields)]
386pub struct ReplaceOp {
387    /// The regex to find.
388    pub regex: String,
389    /// The replacement (supports `$1` capture refs).
390    pub by: String,
391}
392
393/// Character-slice transform op. Indices are 0-based char offsets; a
394/// negative or omitted bound clamps to the string end.
395#[derive(Debug, Clone, Deserialize)]
396#[serde(deny_unknown_fields)]
397pub struct SubstringOp {
398    /// Inclusive start char index (default 0).
399    #[serde(default)]
400    pub start: Option<i64>,
401    /// Exclusive end char index (default: end of string).
402    #[serde(default)]
403    pub end: Option<i64>,
404}
405
406/// Case-conversion transform op.
407#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
408#[serde(rename_all = "snake_case")]
409pub enum ConvertOp {
410    /// `lowerCamelCase`.
411    LowerCamel,
412    /// `UpperCamelCase`.
413    UpperCamel,
414    /// `snake_case`.
415    Snake,
416    /// `SCREAMING_SNAKE_CASE`.
417    ScreamingSnake,
418    /// `kebab-case`.
419    Kebab,
420    /// `lowercase`.
421    Lower,
422    /// `UPPERCASE`.
423    Upper,
424}
425
426impl Rule {
427    /// Derive the rule's kind from its shape (see [`RuleKind`]).
428    pub fn kind(&self) -> RuleKind {
429        if self.fix.is_some() {
430            RuleKind::Codemod
431        } else if self.message.is_empty() {
432            RuleKind::Search
433        } else {
434            RuleKind::Lint
435        }
436    }
437
438    /// Parse a single rule from a TOML document.
439    pub fn from_toml_str(text: &str) -> Result<Self, Box<toml::de::Error>> {
440        toml::from_str(text).map_err(Box::new)
441    }
442}
443
444#[cfg(test)]
445mod tests {
446    use super::*;
447
448    #[test]
449    fn parses_a_codemod_rule() {
450        let rule = Rule::from_toml_str(
451            r#"
452            id = "destructure-default"
453            language = "typescript"
454            severity = "warning"
455            message = "Collapse optional-chain default into a destructuring bind"
456            fix = "{ $KEY: $SRC }"
457
458            [rule]
459            pattern = "$SRC?.$KEY ?? $DEFAULT"
460            "#,
461        )
462        .expect("rule parses");
463        assert_eq!(rule.id, "destructure-default");
464        assert_eq!(rule.language, "typescript");
465        assert_eq!(rule.severity, Severity::Warning);
466        assert_eq!(rule.kind(), RuleKind::Codemod);
467        assert_eq!(
468            rule.rule.atomic().unwrap(),
469            Some(AtomicMatcher::Pattern("$SRC?.$KEY ?? $DEFAULT".into()))
470        );
471    }
472
473    #[test]
474    fn severity_defaults_to_warning() {
475        let rule = Rule::from_toml_str(
476            r#"
477            id = "x"
478            language = "rust"
479            [rule]
480            kind = "macro_invocation"
481            "#,
482        )
483        .unwrap();
484        assert_eq!(rule.severity, Severity::Warning);
485        // No message, no fix -> a search rule.
486        assert_eq!(rule.kind(), RuleKind::Search);
487    }
488
489    #[test]
490    fn lint_rule_has_message_no_fix() {
491        let rule = Rule::from_toml_str(
492            r#"
493            id = "todo"
494            language = "rust"
495            message = "Found a TODO"
496            [rule]
497            regex = "TODO"
498            "#,
499        )
500        .unwrap();
501        assert_eq!(rule.kind(), RuleKind::Lint);
502        assert_eq!(
503            rule.rule.atomic().unwrap(),
504            Some(AtomicMatcher::Regex("TODO".into()))
505        );
506    }
507
508    #[test]
509    fn rejects_multiple_matchers() {
510        let rule = Rule::from_toml_str(
511            r#"
512            id = "x"
513            language = "rust"
514            [rule]
515            kind = "foo"
516            regex = "bar"
517            "#,
518        )
519        .unwrap();
520        assert!(rule.rule.atomic().is_err());
521    }
522
523    #[test]
524    fn empty_matcher_is_detectable() {
525        let rule = Rule::from_toml_str(
526            r#"
527            id = "x"
528            language = "rust"
529            [rule]
530            "#,
531        )
532        .unwrap();
533        // An empty node sets no atomic key (Ok(None)) and is flagged empty.
534        assert_eq!(rule.rule.atomic().unwrap(), None);
535        assert!(rule.rule.is_empty());
536    }
537
538    #[test]
539    fn parses_relational_and_composite_keys() {
540        let rule = Rule::from_toml_str(
541            r#"
542            id = "nested"
543            language = "typescript"
544            [rule]
545            pattern = "let $NAME = $INIT"
546            [rule.inside]
547            kind = "statement_block"
548            stopBy = "end"
549            [rule.not.inside]
550            kind = "try_statement"
551            stopBy = "end"
552            "#,
553        )
554        .expect("parses");
555        assert!(rule.rule.inside.is_some());
556        assert!(rule.rule.not.is_some());
557        assert!(rule.rule.not.as_ref().unwrap().inside.is_some());
558    }
559
560    #[test]
561    fn rejects_unknown_top_level_field() {
562        let err = Rule::from_toml_str(
563            r#"
564            id = "x"
565            language = "rust"
566            bogus = true
567            [rule]
568            kind = "foo"
569            "#,
570        );
571        assert!(err.is_err());
572    }
573
574    #[test]
575    fn parses_semantic_where_constraints() {
576        let rule = Rule::from_toml_str(
577            r#"
578            id = "x"
579            language = "harn"
580            [rule]
581            pattern = "$FN($ARG)"
582
583            [[where]]
584            metavar = "FN"
585            resolvesTo = { name = "target", kind = "fn", line = 1, column = 4 }
586
587            [[where]]
588            metavar = "ARG"
589            type = "int"
590            "#,
591        )
592        .unwrap();
593        assert_eq!(rule.where_constraints.len(), 2);
594        let resolved = rule.where_constraints[0].resolves_to.as_ref().unwrap();
595        assert_eq!(resolved.name.as_deref(), Some("target"));
596        assert_eq!(resolved.kind.as_deref(), Some("fn"));
597        assert_eq!(resolved.line, Some(1));
598        assert_eq!(resolved.column, Some(4));
599        assert_eq!(rule.where_constraints[1].type_.as_deref(), Some("int"));
600    }
601}