Skip to main content

harn_rules/
model.rs

1//! The declarative rule data model.
2//!
3//! A rule is the atomic unit the engine consumes: an identity (`id`,
4//! `language`, `severity`, `message`), a `rule` block describing *what to
5//! match* (the atomic tier: `pattern` snippet, `kind`, or `regex`), and an
6//! optional `fix` describing *how to rewrite* it. Relational/composite
7//! matching (#2833) and `where`/`transform` (#2834) extend this model;
8//! this module is the atomic-tier surface they build on.
9
10use std::collections::BTreeMap;
11
12use serde::Deserialize;
13
14/// Diagnostic severity. Mirrors the `harn-lint` vocabulary so findings can
15/// flow into the same reporting surface.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
17#[serde(rename_all = "lowercase")]
18pub enum Severity {
19    /// Informational; no action required.
20    Info,
21    /// Default — something worth a human's attention.
22    #[default]
23    Warning,
24    /// A problem that should block.
25    Error,
26}
27
28/// How risky a rule's `fix` is, mapped onto Burin's edit-safety taxonomy.
29/// Ordered least → most dangerous; the codemod runner auto-applies only the
30/// two safest tiers (see [`Safety::applicability`]).
31#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Deserialize)]
32#[serde(rename_all = "kebab-case")]
33pub enum Safety {
34    /// Whitespace / formatting only.
35    FormatOnly,
36    /// Semantics-preserving rewrite.
37    BehaviorPreserving,
38    /// Changes behavior, but only within the matched scope. **Default** —
39    /// conservative, so an undeclared codemod does not silently auto-apply.
40    #[default]
41    ScopeLocal,
42    /// Changes an externally-visible surface (a signature, an export).
43    SurfaceChanging,
44    /// Changes capabilities / effects (I/O, permissions).
45    CapabilityChanging,
46    /// Always requires a human in the loop.
47    NeedsHuman,
48}
49
50/// Whether a fix may be auto-applied (clippy/ESLint `machine-applicable`)
51/// or is opt-in only (`suggestion`).
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Applicability {
54    /// Safe to auto-apply (`format-only` / `behavior-preserving`).
55    MachineApplicable,
56    /// Preview / opt-in only.
57    Suggestion,
58}
59
60impl Safety {
61    /// The applicability tier this safety level maps to. `format-only` and
62    /// `behavior-preserving` are machine-applicable; everything riskier is a
63    /// suggestion.
64    pub fn applicability(self) -> Applicability {
65        if self <= Safety::BehaviorPreserving {
66            Applicability::MachineApplicable
67        } else {
68            Applicability::Suggestion
69        }
70    }
71
72    /// True when the runner may auto-apply this rule's fix without an
73    /// explicit opt-in.
74    pub fn is_auto_applicable(self) -> bool {
75        self.applicability() == Applicability::MachineApplicable
76    }
77}
78
79/// What flavor of work a rule performs, derived from its shape rather than
80/// declared: a rule with a `fix` is a codemod; one with a `message` but no
81/// `fix` is a lint; a bare matcher is a search.
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum RuleKind {
84    /// Find-only: report matches, no diagnostic text, no rewrite.
85    Search,
86    /// Report a diagnostic (`message` + `severity`), no rewrite.
87    Lint,
88    /// Rewrite matches via `fix`.
89    Codemod,
90}
91
92/// The atomic-tier matcher. Exactly one of `pattern` / `kind` / `regex`
93/// must be set on a node that carries one; [`RuleNode::atomic`] resolves it.
94///
95/// A `RuleNode` is the recursive matching algebra: an optional **atomic**
96/// leaf (`pattern` / `kind` / `regex`), **relational** constraints
97/// (`inside` / `has` / `follows` / `precedes`, each a sub-node tuned by
98/// `stop_by` / `field`), and **composite** combinators (`all` / `any` /
99/// `not` / `matches`). Every key set on a node is ANDed: the node matches a
100/// tree-sitter node iff its atomic part matches *and* every relational and
101/// composite part holds.
102#[derive(Debug, Clone, Default, Deserialize)]
103pub struct RuleNode {
104    /// A code snippet in the target grammar with `$VAR` metavariable holes.
105    pub pattern: Option<String>,
106    /// A bare tree-sitter node kind (e.g. `"call_expression"`).
107    pub kind: Option<String>,
108    /// A regular expression matched against node text.
109    pub regex: Option<String>,
110
111    /// The node must be **inside** a node matching this sub-rule (ancestor).
112    pub inside: Option<Box<RuleNode>>,
113    /// The node must **have** a descendant matching this sub-rule.
114    pub has: Option<Box<RuleNode>>,
115    /// The node must **follow** a node matching this sub-rule (earlier).
116    pub follows: Option<Box<RuleNode>>,
117    /// The node must **precede** a node matching this sub-rule (later).
118    pub precedes: Option<Box<RuleNode>>,
119
120    /// Relational reach (used when this node is an `inside`/`has`/… target):
121    /// `neighbor` (direct only, default), `end` (transitive), or a rule that
122    /// halts the walk. (TOML `stopBy` or `stop_by`.)
123    #[serde(default, alias = "stopBy")]
124    pub stop_by: Option<StopBy>,
125    /// Restrict an `inside`/`has` relation to a specific tree-sitter field.
126    pub field: Option<String>,
127
128    /// Every sub-rule must match the node.
129    pub all: Option<Vec<RuleNode>>,
130    /// At least one sub-rule must match the node.
131    pub any: Option<Vec<RuleNode>>,
132    /// The sub-rule must NOT match the node.
133    pub not: Option<Box<RuleNode>>,
134    /// Reference a utility rule by id (resolved from `[utils]`).
135    pub matches: Option<String>,
136}
137
138/// How far a relational op (`inside` / `has` / `follows` / `precedes`)
139/// walks the tree looking for a match.
140#[derive(Debug, Clone, Deserialize)]
141#[serde(untagged)]
142pub enum StopBy {
143    /// `"neighbor"` (direct parent/child/sibling only) or `"end"`
144    /// (transitive — walk to the tree boundary).
145    Keyword(StopKeyword),
146    /// Walk until a node matching this rule is reached, then stop.
147    Rule(Box<RuleNode>),
148}
149
150/// The keyword forms of [`StopBy`].
151#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
152#[serde(rename_all = "lowercase")]
153pub enum StopKeyword {
154    /// Only the immediate neighbor (default).
155    Neighbor,
156    /// Transitive — walk all the way to the tree boundary.
157    End,
158}
159
160/// The resolved, exactly-one atomic matcher.
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub enum AtomicMatcher {
163    /// A snippet pattern with metavariable holes.
164    Pattern(String),
165    /// A tree-sitter node kind.
166    Kind(String),
167    /// A regex over node text.
168    Regex(String),
169}
170
171impl RuleNode {
172    /// Resolve this node's atomic leaf. `Ok(None)` when the node is purely
173    /// relational/composite; `Err` when more than one atomic key is set.
174    pub fn atomic(&self) -> Result<Option<AtomicMatcher>, String> {
175        let set: Vec<&str> = [
176            self.pattern.as_ref().map(|_| "pattern"),
177            self.kind.as_ref().map(|_| "kind"),
178            self.regex.as_ref().map(|_| "regex"),
179        ]
180        .into_iter()
181        .flatten()
182        .collect();
183        match set.as_slice() {
184            [] => Ok(None),
185            [one] => Ok(Some(match *one {
186                "pattern" => AtomicMatcher::Pattern(self.pattern.clone().unwrap()),
187                "kind" => AtomicMatcher::Kind(self.kind.clone().unwrap()),
188                _ => AtomicMatcher::Regex(self.regex.clone().unwrap()),
189            })),
190            many => Err(format!(
191                "rule node sets multiple atomic matchers ({}); set at most one",
192                many.join(", ")
193            )),
194        }
195    }
196
197    /// True when `regex` is the only key set — a top-level grep-style rule
198    /// that scans source text rather than the tree.
199    pub fn is_pure_regex(&self) -> bool {
200        self.regex.is_some()
201            && self.pattern.is_none()
202            && self.kind.is_none()
203            && self.inside.is_none()
204            && self.has.is_none()
205            && self.follows.is_none()
206            && self.precedes.is_none()
207            && self.all.is_none()
208            && self.any.is_none()
209            && self.not.is_none()
210            && self.matches.is_none()
211    }
212
213    /// True when the node sets no matching keys at all (an empty node, which
214    /// is a rule authoring error).
215    pub fn is_empty(&self) -> bool {
216        self.pattern.is_none()
217            && self.kind.is_none()
218            && self.regex.is_none()
219            && self.inside.is_none()
220            && self.has.is_none()
221            && self.follows.is_none()
222            && self.precedes.is_none()
223            && self.all.is_none()
224            && self.any.is_none()
225            && self.not.is_none()
226            && self.matches.is_none()
227    }
228}
229
230/// A single declarative rule.
231#[derive(Debug, Clone, Deserialize)]
232#[serde(deny_unknown_fields)]
233pub struct Rule {
234    /// Stable identifier (also the diagnostic code).
235    pub id: String,
236    /// Target language name (resolved via `harn_hostlib::ast::Language`).
237    pub language: String,
238    /// Diagnostic severity. Defaults to `warning`.
239    #[serde(default)]
240    pub severity: Severity,
241    /// Human-readable diagnostic message. Empty for search-only rules.
242    #[serde(default)]
243    pub message: String,
244    /// How risky the `fix` is. Gates auto-apply. Defaults to `scope-local`.
245    #[serde(default)]
246    pub safety: Safety,
247    /// The matcher block (atomic / relational / composite algebra).
248    pub rule: RuleNode,
249    /// Local utility rules referenced by `matches`, keyed by id.
250    /// (TOML `[utils.NAME]`.)
251    #[serde(default)]
252    pub utils: BTreeMap<String, RuleNode>,
253    /// Predicates on captured metavars; a match survives only when every
254    /// constraint holds. (TOML `[[where]]`.)
255    #[serde(default, rename = "where")]
256    pub where_constraints: Vec<Constraint>,
257    /// Derived metavars synthesized from captured ones before `fix`
258    /// interpolation, keyed by the new metavar name. (TOML `[transform.X]`.)
259    #[serde(default)]
260    pub transform: BTreeMap<String, Transform>,
261    /// Replacement template. Its presence makes the rule a codemod.
262    #[serde(default)]
263    pub fix: Option<String>,
264}
265
266/// A `where` predicate on a captured metavar. Exactly one of `regex` /
267/// `comparison` / `pattern` is set.
268#[derive(Debug, Clone, Deserialize)]
269#[serde(deny_unknown_fields)]
270pub struct Constraint {
271    /// The metavar this constraint applies to (without the leading `$`).
272    pub metavar: String,
273    /// The metavar's text must match this regex.
274    #[serde(default)]
275    pub regex: Option<String>,
276    /// The metavar's text, parsed as a number, must satisfy this
277    /// comparison (Semgrep `metavariable-comparison`).
278    #[serde(default)]
279    pub comparison: Option<Comparison>,
280    /// A sub-pattern (Semgrep `metavariable-pattern`) run against the
281    /// metavar's captured text; the constraint holds when it matches.
282    #[serde(default)]
283    pub pattern: Option<String>,
284    /// Optional language override for `pattern` — lets a captured string
285    /// literal be matched in a different grammar than the host file.
286    #[serde(default)]
287    pub language: Option<String>,
288}
289
290/// A numeric/string comparison for a [`Constraint`].
291#[derive(Debug, Clone, Deserialize)]
292#[serde(deny_unknown_fields)]
293pub struct Comparison {
294    /// One of `<` `<=` `>` `>=` `==` `!=`.
295    pub op: String,
296    /// The right-hand side. Numbers compare numerically; strings/bools
297    /// compare with `==` / `!=` only.
298    pub value: toml::Value,
299}
300
301/// A metavar transform: read `source`, apply exactly one operation, bind
302/// the result under a new metavar name (the map key).
303#[derive(Debug, Clone, Deserialize)]
304#[serde(deny_unknown_fields)]
305pub struct Transform {
306    /// The source metavar name (without `$`) whose text is transformed.
307    pub source: String,
308    /// Regex find/replace.
309    #[serde(default)]
310    pub replace: Option<ReplaceOp>,
311    /// A character-index slice.
312    #[serde(default)]
313    pub substring: Option<SubstringOp>,
314    /// A case conversion.
315    #[serde(default)]
316    pub convert: Option<ConvertOp>,
317}
318
319/// Regex find/replace transform op.
320#[derive(Debug, Clone, Deserialize)]
321#[serde(deny_unknown_fields)]
322pub struct ReplaceOp {
323    /// The regex to find.
324    pub regex: String,
325    /// The replacement (supports `$1` capture refs).
326    pub by: String,
327}
328
329/// Character-slice transform op. Indices are 0-based char offsets; a
330/// negative or omitted bound clamps to the string end.
331#[derive(Debug, Clone, Deserialize)]
332#[serde(deny_unknown_fields)]
333pub struct SubstringOp {
334    /// Inclusive start char index (default 0).
335    #[serde(default)]
336    pub start: Option<i64>,
337    /// Exclusive end char index (default: end of string).
338    #[serde(default)]
339    pub end: Option<i64>,
340}
341
342/// Case-conversion transform op.
343#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
344#[serde(rename_all = "snake_case")]
345pub enum ConvertOp {
346    /// `lowerCamelCase`.
347    LowerCamel,
348    /// `UpperCamelCase`.
349    UpperCamel,
350    /// `snake_case`.
351    Snake,
352    /// `SCREAMING_SNAKE_CASE`.
353    ScreamingSnake,
354    /// `kebab-case`.
355    Kebab,
356    /// `lowercase`.
357    Lower,
358    /// `UPPERCASE`.
359    Upper,
360}
361
362impl Rule {
363    /// Derive the rule's kind from its shape (see [`RuleKind`]).
364    pub fn kind(&self) -> RuleKind {
365        if self.fix.is_some() {
366            RuleKind::Codemod
367        } else if self.message.is_empty() {
368            RuleKind::Search
369        } else {
370            RuleKind::Lint
371        }
372    }
373
374    /// Parse a single rule from a TOML document.
375    pub fn from_toml_str(text: &str) -> Result<Self, Box<toml::de::Error>> {
376        toml::from_str(text).map_err(Box::new)
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn parses_a_codemod_rule() {
386        let rule = Rule::from_toml_str(
387            r#"
388            id = "destructure-default"
389            language = "typescript"
390            severity = "warning"
391            message = "Collapse optional-chain default into a destructuring bind"
392            fix = "{ $KEY: $SRC }"
393
394            [rule]
395            pattern = "$SRC?.$KEY ?? $DEFAULT"
396            "#,
397        )
398        .expect("rule parses");
399        assert_eq!(rule.id, "destructure-default");
400        assert_eq!(rule.language, "typescript");
401        assert_eq!(rule.severity, Severity::Warning);
402        assert_eq!(rule.kind(), RuleKind::Codemod);
403        assert_eq!(
404            rule.rule.atomic().unwrap(),
405            Some(AtomicMatcher::Pattern("$SRC?.$KEY ?? $DEFAULT".into()))
406        );
407    }
408
409    #[test]
410    fn severity_defaults_to_warning() {
411        let rule = Rule::from_toml_str(
412            r#"
413            id = "x"
414            language = "rust"
415            [rule]
416            kind = "macro_invocation"
417            "#,
418        )
419        .unwrap();
420        assert_eq!(rule.severity, Severity::Warning);
421        // No message, no fix -> a search rule.
422        assert_eq!(rule.kind(), RuleKind::Search);
423    }
424
425    #[test]
426    fn lint_rule_has_message_no_fix() {
427        let rule = Rule::from_toml_str(
428            r#"
429            id = "todo"
430            language = "rust"
431            message = "Found a TODO"
432            [rule]
433            regex = "TODO"
434            "#,
435        )
436        .unwrap();
437        assert_eq!(rule.kind(), RuleKind::Lint);
438        assert_eq!(
439            rule.rule.atomic().unwrap(),
440            Some(AtomicMatcher::Regex("TODO".into()))
441        );
442    }
443
444    #[test]
445    fn rejects_multiple_matchers() {
446        let rule = Rule::from_toml_str(
447            r#"
448            id = "x"
449            language = "rust"
450            [rule]
451            kind = "foo"
452            regex = "bar"
453            "#,
454        )
455        .unwrap();
456        assert!(rule.rule.atomic().is_err());
457    }
458
459    #[test]
460    fn empty_matcher_is_detectable() {
461        let rule = Rule::from_toml_str(
462            r#"
463            id = "x"
464            language = "rust"
465            [rule]
466            "#,
467        )
468        .unwrap();
469        // An empty node sets no atomic key (Ok(None)) and is flagged empty.
470        assert_eq!(rule.rule.atomic().unwrap(), None);
471        assert!(rule.rule.is_empty());
472    }
473
474    #[test]
475    fn parses_relational_and_composite_keys() {
476        let rule = Rule::from_toml_str(
477            r#"
478            id = "nested"
479            language = "typescript"
480            [rule]
481            pattern = "let $NAME = $INIT"
482            [rule.inside]
483            kind = "statement_block"
484            stopBy = "end"
485            [rule.not.inside]
486            kind = "try_statement"
487            stopBy = "end"
488            "#,
489        )
490        .expect("parses");
491        assert!(rule.rule.inside.is_some());
492        assert!(rule.rule.not.is_some());
493        assert!(rule.rule.not.as_ref().unwrap().inside.is_some());
494    }
495
496    #[test]
497    fn rejects_unknown_top_level_field() {
498        let err = Rule::from_toml_str(
499            r#"
500            id = "x"
501            language = "rust"
502            bogus = true
503            [rule]
504            kind = "foo"
505            "#,
506        );
507        assert!(err.is_err());
508    }
509}