Skip to main content

harn_rules/
model.rs

1//! The declarative rule data model.
2//!
3//! A rule is the atomic unit the engine consumes: an identity (`id`,
4//! `language`, `severity`, `message`), a `rule` block describing *what to
5//! match* (the atomic tier: `pattern` snippet, `kind`, or `regex`), and an
6//! optional `fix` describing *how to rewrite* it. Relational/composite
7//! matching (#2833) and `where`/`transform` (#2834) extend this model;
8//! this module is the atomic-tier surface they build on.
9
10use std::collections::BTreeMap;
11
12use serde::Deserialize;
13
14/// Diagnostic severity. Mirrors the `harn-lint` vocabulary so findings can
15/// flow into the same reporting surface.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
17#[serde(rename_all = "lowercase")]
18pub enum Severity {
19    /// Informational; no action required.
20    Info,
21    /// Default — something worth a human's attention.
22    #[default]
23    Warning,
24    /// A problem that should block.
25    Error,
26}
27
28impl Severity {
29    /// The stable lowercase name (the inverse of the `Deserialize` rename),
30    /// used for diagnostics and JSON surfaces.
31    pub fn as_str(self) -> &'static str {
32        match self {
33            Severity::Info => "info",
34            Severity::Warning => "warning",
35            Severity::Error => "error",
36        }
37    }
38}
39
40/// How risky a rule's `fix` is, mapped onto Burin's edit-safety taxonomy.
41/// Ordered least → most dangerous; the codemod runner auto-applies only the
42/// two safest tiers (see [`Safety::applicability`]).
43#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Deserialize)]
44#[serde(rename_all = "kebab-case")]
45pub enum Safety {
46    /// Whitespace / formatting only.
47    FormatOnly,
48    /// Semantics-preserving rewrite.
49    BehaviorPreserving,
50    /// Changes behavior, but only within the matched scope. **Default** —
51    /// conservative, so an undeclared codemod does not silently auto-apply.
52    #[default]
53    ScopeLocal,
54    /// Changes an externally-visible surface (a signature, an export).
55    SurfaceChanging,
56    /// Changes capabilities / effects (I/O, permissions).
57    CapabilityChanging,
58    /// Always requires a human in the loop.
59    NeedsHuman,
60}
61
62/// Whether a fix may be auto-applied (clippy/ESLint `machine-applicable`)
63/// or is opt-in only (`suggestion`).
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum Applicability {
66    /// Safe to auto-apply (`format-only` / `behavior-preserving`).
67    MachineApplicable,
68    /// Preview / opt-in only.
69    Suggestion,
70}
71
72impl Applicability {
73    /// The stable name used for diagnostics and JSON surfaces.
74    pub fn as_str(self) -> &'static str {
75        match self {
76            Applicability::MachineApplicable => "machine-applicable",
77            Applicability::Suggestion => "suggestion",
78        }
79    }
80}
81
82impl Safety {
83    /// The stable kebab-case name (the inverse of the `Deserialize` rename),
84    /// used for diagnostics and JSON surfaces.
85    pub fn as_str(self) -> &'static str {
86        match self {
87            Safety::FormatOnly => "format-only",
88            Safety::BehaviorPreserving => "behavior-preserving",
89            Safety::ScopeLocal => "scope-local",
90            Safety::SurfaceChanging => "surface-changing",
91            Safety::CapabilityChanging => "capability-changing",
92            Safety::NeedsHuman => "needs-human",
93        }
94    }
95
96    /// The applicability tier this safety level maps to. `format-only` and
97    /// `behavior-preserving` are machine-applicable; everything riskier is a
98    /// suggestion.
99    pub fn applicability(self) -> Applicability {
100        if self <= Safety::BehaviorPreserving {
101            Applicability::MachineApplicable
102        } else {
103            Applicability::Suggestion
104        }
105    }
106
107    /// True when the runner may auto-apply this rule's fix without an
108    /// explicit opt-in.
109    pub fn is_auto_applicable(self) -> bool {
110        self.applicability() == Applicability::MachineApplicable
111    }
112}
113
114/// What flavor of work a rule performs, derived from its shape rather than
115/// declared: a rule with a `fix` is a codemod; one with a `message` but no
116/// `fix` is a lint; a bare matcher is a search.
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum RuleKind {
119    /// Find-only: report matches, no diagnostic text, no rewrite.
120    Search,
121    /// Report a diagnostic (`message` + `severity`), no rewrite.
122    Lint,
123    /// Rewrite matches via `fix`.
124    Codemod,
125}
126
127/// The atomic-tier matcher. Exactly one of `pattern` / `kind` / `regex`
128/// must be set on a node that carries one; [`RuleNode::atomic`] resolves it.
129///
130/// A `RuleNode` is the recursive matching algebra: an optional **atomic**
131/// leaf (`pattern` / `kind` / `regex`), **relational** constraints
132/// (`inside` / `has` / `follows` / `precedes`, each a sub-node tuned by
133/// `stop_by` / `field`), and **composite** combinators (`all` / `any` /
134/// `not` / `matches`). Every key set on a node is ANDed: the node matches a
135/// tree-sitter node iff its atomic part matches *and* every relational and
136/// composite part holds.
137#[derive(Debug, Clone, Default, Deserialize)]
138pub struct RuleNode {
139    /// A code snippet in the target grammar with `$VAR` metavariable holes.
140    pub pattern: Option<String>,
141    /// A bare tree-sitter node kind (e.g. `"call_expression"`).
142    pub kind: Option<String>,
143    /// A regular expression matched against node text.
144    pub regex: Option<String>,
145
146    /// The node must be **inside** a node matching this sub-rule (ancestor).
147    pub inside: Option<Box<RuleNode>>,
148    /// The node must **have** a descendant matching this sub-rule.
149    pub has: Option<Box<RuleNode>>,
150    /// The node must **follow** a node matching this sub-rule (earlier).
151    pub follows: Option<Box<RuleNode>>,
152    /// The node must **precede** a node matching this sub-rule (later).
153    pub precedes: Option<Box<RuleNode>>,
154
155    /// Relational reach (used when this node is an `inside`/`has`/… target):
156    /// `neighbor` (direct only, default), `end` (transitive), or a rule that
157    /// halts the walk. (TOML `stopBy` or `stop_by`.)
158    #[serde(default, alias = "stopBy")]
159    pub stop_by: Option<StopBy>,
160    /// Restrict an `inside`/`has` relation to a specific tree-sitter field.
161    pub field: Option<String>,
162
163    /// Every sub-rule must match the node.
164    pub all: Option<Vec<RuleNode>>,
165    /// At least one sub-rule must match the node.
166    pub any: Option<Vec<RuleNode>>,
167    /// The sub-rule must NOT match the node.
168    pub not: Option<Box<RuleNode>>,
169    /// Reference a utility rule by id (resolved from `[utils]`).
170    pub matches: Option<String>,
171}
172
173/// How far a relational op (`inside` / `has` / `follows` / `precedes`)
174/// walks the tree looking for a match.
175#[derive(Debug, Clone, Deserialize)]
176#[serde(untagged)]
177pub enum StopBy {
178    /// `"neighbor"` (direct parent/child/sibling only) or `"end"`
179    /// (transitive — walk to the tree boundary).
180    Keyword(StopKeyword),
181    /// Walk until a node matching this rule is reached, then stop.
182    Rule(Box<RuleNode>),
183}
184
185/// The keyword forms of [`StopBy`].
186#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
187#[serde(rename_all = "lowercase")]
188pub enum StopKeyword {
189    /// Only the immediate neighbor (default).
190    Neighbor,
191    /// Transitive — walk all the way to the tree boundary.
192    End,
193}
194
195/// The resolved, exactly-one atomic matcher.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub enum AtomicMatcher {
198    /// A snippet pattern with metavariable holes.
199    Pattern(String),
200    /// A tree-sitter node kind.
201    Kind(String),
202    /// A regex over node text.
203    Regex(String),
204}
205
206impl RuleNode {
207    /// Resolve this node's atomic leaf. `Ok(None)` when the node is purely
208    /// relational/composite; `Err` when more than one atomic key is set.
209    pub fn atomic(&self) -> Result<Option<AtomicMatcher>, String> {
210        let set: Vec<&str> = [
211            self.pattern.as_ref().map(|_| "pattern"),
212            self.kind.as_ref().map(|_| "kind"),
213            self.regex.as_ref().map(|_| "regex"),
214        ]
215        .into_iter()
216        .flatten()
217        .collect();
218        match set.as_slice() {
219            [] => Ok(None),
220            [one] => Ok(Some(match *one {
221                "pattern" => AtomicMatcher::Pattern(self.pattern.clone().unwrap()),
222                "kind" => AtomicMatcher::Kind(self.kind.clone().unwrap()),
223                _ => AtomicMatcher::Regex(self.regex.clone().unwrap()),
224            })),
225            many => Err(format!(
226                "rule node sets multiple atomic matchers ({}); set at most one",
227                many.join(", ")
228            )),
229        }
230    }
231
232    /// True when `regex` is the only key set — a top-level grep-style rule
233    /// that scans source text rather than the tree.
234    pub fn is_pure_regex(&self) -> bool {
235        self.regex.is_some()
236            && self.pattern.is_none()
237            && self.kind.is_none()
238            && self.inside.is_none()
239            && self.has.is_none()
240            && self.follows.is_none()
241            && self.precedes.is_none()
242            && self.all.is_none()
243            && self.any.is_none()
244            && self.not.is_none()
245            && self.matches.is_none()
246    }
247
248    /// True when the node sets no matching keys at all (an empty node, which
249    /// is a rule authoring error).
250    pub fn is_empty(&self) -> bool {
251        self.pattern.is_none()
252            && self.kind.is_none()
253            && self.regex.is_none()
254            && self.inside.is_none()
255            && self.has.is_none()
256            && self.follows.is_none()
257            && self.precedes.is_none()
258            && self.all.is_none()
259            && self.any.is_none()
260            && self.not.is_none()
261            && self.matches.is_none()
262    }
263}
264
265/// A single declarative rule.
266#[derive(Debug, Clone, Deserialize)]
267#[serde(deny_unknown_fields)]
268pub struct Rule {
269    /// Stable identifier (also the diagnostic code).
270    pub id: String,
271    /// Target language name (resolved via `harn_hostlib::ast::Language`).
272    pub language: String,
273    /// Diagnostic severity. Defaults to `warning`.
274    #[serde(default)]
275    pub severity: Severity,
276    /// Human-readable diagnostic message. Empty for search-only rules.
277    #[serde(default)]
278    pub message: String,
279    /// How risky the `fix` is. Gates auto-apply. Defaults to `scope-local`.
280    #[serde(default)]
281    pub safety: Safety,
282    /// The matcher block (atomic / relational / composite algebra).
283    pub rule: RuleNode,
284    /// Local utility rules referenced by `matches`, keyed by id.
285    /// (TOML `[utils.NAME]`.)
286    #[serde(default)]
287    pub utils: BTreeMap<String, RuleNode>,
288    /// Predicates on captured metavars; a match survives only when every
289    /// constraint holds. (TOML `[[where]]`.)
290    #[serde(default, rename = "where")]
291    pub where_constraints: Vec<Constraint>,
292    /// Derived metavars synthesized from captured ones before `fix`
293    /// interpolation, keyed by the new metavar name. (TOML `[transform.X]`.)
294    #[serde(default)]
295    pub transform: BTreeMap<String, Transform>,
296    /// Replacement template. Its presence makes the rule a codemod.
297    #[serde(default)]
298    pub fix: Option<String>,
299}
300
301/// A `where` predicate on a captured metavar. Exactly one of `regex` /
302/// `comparison` / `pattern` is set.
303#[derive(Debug, Clone, Deserialize)]
304#[serde(deny_unknown_fields)]
305pub struct Constraint {
306    /// The metavar this constraint applies to (without the leading `$`).
307    pub metavar: String,
308    /// The metavar's text must match this regex.
309    #[serde(default)]
310    pub regex: Option<String>,
311    /// The metavar's text, parsed as a number, must satisfy this
312    /// comparison (Semgrep `metavariable-comparison`).
313    #[serde(default)]
314    pub comparison: Option<Comparison>,
315    /// A sub-pattern (Semgrep `metavariable-pattern`) run against the
316    /// metavar's captured text; the constraint holds when it matches.
317    #[serde(default)]
318    pub pattern: Option<String>,
319    /// Optional language override for `pattern` — lets a captured string
320    /// literal be matched in a different grammar than the host file.
321    #[serde(default)]
322    pub language: Option<String>,
323}
324
325/// A numeric/string comparison for a [`Constraint`].
326#[derive(Debug, Clone, Deserialize)]
327#[serde(deny_unknown_fields)]
328pub struct Comparison {
329    /// One of `<` `<=` `>` `>=` `==` `!=`.
330    pub op: String,
331    /// The right-hand side. Numbers compare numerically; strings/bools
332    /// compare with `==` / `!=` only.
333    pub value: toml::Value,
334}
335
336/// A metavar transform: read `source`, apply exactly one operation, bind
337/// the result under a new metavar name (the map key).
338#[derive(Debug, Clone, Deserialize)]
339#[serde(deny_unknown_fields)]
340pub struct Transform {
341    /// The source metavar name (without `$`) whose text is transformed.
342    pub source: String,
343    /// Regex find/replace.
344    #[serde(default)]
345    pub replace: Option<ReplaceOp>,
346    /// A character-index slice.
347    #[serde(default)]
348    pub substring: Option<SubstringOp>,
349    /// A case conversion.
350    #[serde(default)]
351    pub convert: Option<ConvertOp>,
352}
353
354/// Regex find/replace transform op.
355#[derive(Debug, Clone, Deserialize)]
356#[serde(deny_unknown_fields)]
357pub struct ReplaceOp {
358    /// The regex to find.
359    pub regex: String,
360    /// The replacement (supports `$1` capture refs).
361    pub by: String,
362}
363
364/// Character-slice transform op. Indices are 0-based char offsets; a
365/// negative or omitted bound clamps to the string end.
366#[derive(Debug, Clone, Deserialize)]
367#[serde(deny_unknown_fields)]
368pub struct SubstringOp {
369    /// Inclusive start char index (default 0).
370    #[serde(default)]
371    pub start: Option<i64>,
372    /// Exclusive end char index (default: end of string).
373    #[serde(default)]
374    pub end: Option<i64>,
375}
376
377/// Case-conversion transform op.
378#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
379#[serde(rename_all = "snake_case")]
380pub enum ConvertOp {
381    /// `lowerCamelCase`.
382    LowerCamel,
383    /// `UpperCamelCase`.
384    UpperCamel,
385    /// `snake_case`.
386    Snake,
387    /// `SCREAMING_SNAKE_CASE`.
388    ScreamingSnake,
389    /// `kebab-case`.
390    Kebab,
391    /// `lowercase`.
392    Lower,
393    /// `UPPERCASE`.
394    Upper,
395}
396
397impl Rule {
398    /// Derive the rule's kind from its shape (see [`RuleKind`]).
399    pub fn kind(&self) -> RuleKind {
400        if self.fix.is_some() {
401            RuleKind::Codemod
402        } else if self.message.is_empty() {
403            RuleKind::Search
404        } else {
405            RuleKind::Lint
406        }
407    }
408
409    /// Parse a single rule from a TOML document.
410    pub fn from_toml_str(text: &str) -> Result<Self, Box<toml::de::Error>> {
411        toml::from_str(text).map_err(Box::new)
412    }
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418
419    #[test]
420    fn parses_a_codemod_rule() {
421        let rule = Rule::from_toml_str(
422            r#"
423            id = "destructure-default"
424            language = "typescript"
425            severity = "warning"
426            message = "Collapse optional-chain default into a destructuring bind"
427            fix = "{ $KEY: $SRC }"
428
429            [rule]
430            pattern = "$SRC?.$KEY ?? $DEFAULT"
431            "#,
432        )
433        .expect("rule parses");
434        assert_eq!(rule.id, "destructure-default");
435        assert_eq!(rule.language, "typescript");
436        assert_eq!(rule.severity, Severity::Warning);
437        assert_eq!(rule.kind(), RuleKind::Codemod);
438        assert_eq!(
439            rule.rule.atomic().unwrap(),
440            Some(AtomicMatcher::Pattern("$SRC?.$KEY ?? $DEFAULT".into()))
441        );
442    }
443
444    #[test]
445    fn severity_defaults_to_warning() {
446        let rule = Rule::from_toml_str(
447            r#"
448            id = "x"
449            language = "rust"
450            [rule]
451            kind = "macro_invocation"
452            "#,
453        )
454        .unwrap();
455        assert_eq!(rule.severity, Severity::Warning);
456        // No message, no fix -> a search rule.
457        assert_eq!(rule.kind(), RuleKind::Search);
458    }
459
460    #[test]
461    fn lint_rule_has_message_no_fix() {
462        let rule = Rule::from_toml_str(
463            r#"
464            id = "todo"
465            language = "rust"
466            message = "Found a TODO"
467            [rule]
468            regex = "TODO"
469            "#,
470        )
471        .unwrap();
472        assert_eq!(rule.kind(), RuleKind::Lint);
473        assert_eq!(
474            rule.rule.atomic().unwrap(),
475            Some(AtomicMatcher::Regex("TODO".into()))
476        );
477    }
478
479    #[test]
480    fn rejects_multiple_matchers() {
481        let rule = Rule::from_toml_str(
482            r#"
483            id = "x"
484            language = "rust"
485            [rule]
486            kind = "foo"
487            regex = "bar"
488            "#,
489        )
490        .unwrap();
491        assert!(rule.rule.atomic().is_err());
492    }
493
494    #[test]
495    fn empty_matcher_is_detectable() {
496        let rule = Rule::from_toml_str(
497            r#"
498            id = "x"
499            language = "rust"
500            [rule]
501            "#,
502        )
503        .unwrap();
504        // An empty node sets no atomic key (Ok(None)) and is flagged empty.
505        assert_eq!(rule.rule.atomic().unwrap(), None);
506        assert!(rule.rule.is_empty());
507    }
508
509    #[test]
510    fn parses_relational_and_composite_keys() {
511        let rule = Rule::from_toml_str(
512            r#"
513            id = "nested"
514            language = "typescript"
515            [rule]
516            pattern = "let $NAME = $INIT"
517            [rule.inside]
518            kind = "statement_block"
519            stopBy = "end"
520            [rule.not.inside]
521            kind = "try_statement"
522            stopBy = "end"
523            "#,
524        )
525        .expect("parses");
526        assert!(rule.rule.inside.is_some());
527        assert!(rule.rule.not.is_some());
528        assert!(rule.rule.not.as_ref().unwrap().inside.is_some());
529    }
530
531    #[test]
532    fn rejects_unknown_top_level_field() {
533        let err = Rule::from_toml_str(
534            r#"
535            id = "x"
536            language = "rust"
537            bogus = true
538            [rule]
539            kind = "foo"
540            "#,
541        );
542        assert!(err.is_err());
543    }
544}