harn_rules/model.rs
1//! The declarative rule data model.
2//!
3//! A rule is the atomic unit the engine consumes: an identity (`id`,
4//! `language`, `severity`, `message`), a `rule` block describing *what to
5//! match* (the atomic tier: `pattern` snippet, `kind`, or `regex`), and an
6//! optional `fix` describing *how to rewrite* it. Relational/composite
7//! matching (#2833) and `where`/`transform` (#2834) extend this model;
8//! this module is the atomic-tier surface they build on.
9
10use std::collections::BTreeMap;
11
12use serde::Deserialize;
13
14/// Diagnostic severity. Mirrors the `harn-lint` vocabulary so findings can
15/// flow into the same reporting surface.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
17#[serde(rename_all = "lowercase")]
18pub enum Severity {
19 /// Informational; no action required.
20 Info,
21 /// Default — something worth a human's attention.
22 #[default]
23 Warning,
24 /// A problem that should block.
25 Error,
26}
27
28impl Severity {
29 /// The stable lowercase name (the inverse of the `Deserialize` rename),
30 /// used for diagnostics and JSON surfaces.
31 pub fn as_str(self) -> &'static str {
32 match self {
33 Severity::Info => "info",
34 Severity::Warning => "warning",
35 Severity::Error => "error",
36 }
37 }
38}
39
40/// How risky a rule's `fix` is, mapped onto Burin's edit-safety taxonomy.
41/// Ordered least → most dangerous; the codemod runner auto-applies only the
42/// two safest tiers (see [`Safety::applicability`]).
43#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Deserialize)]
44#[serde(rename_all = "kebab-case")]
45pub enum Safety {
46 /// Whitespace / formatting only.
47 FormatOnly,
48 /// Semantics-preserving rewrite.
49 BehaviorPreserving,
50 /// Changes behavior, but only within the matched scope. **Default** —
51 /// conservative, so an undeclared codemod does not silently auto-apply.
52 #[default]
53 ScopeLocal,
54 /// Changes an externally-visible surface (a signature, an export).
55 SurfaceChanging,
56 /// Changes capabilities / effects (I/O, permissions).
57 CapabilityChanging,
58 /// Always requires a human in the loop.
59 NeedsHuman,
60}
61
62/// Whether a fix may be auto-applied (clippy/ESLint `machine-applicable`)
63/// or is opt-in only (`suggestion`).
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum Applicability {
66 /// Safe to auto-apply (`format-only` / `behavior-preserving`).
67 MachineApplicable,
68 /// Preview / opt-in only.
69 Suggestion,
70}
71
72impl Applicability {
73 /// The stable name used for diagnostics and JSON surfaces.
74 pub fn as_str(self) -> &'static str {
75 match self {
76 Applicability::MachineApplicable => "machine-applicable",
77 Applicability::Suggestion => "suggestion",
78 }
79 }
80}
81
82impl Safety {
83 /// The stable kebab-case name (the inverse of the `Deserialize` rename),
84 /// used for diagnostics and JSON surfaces.
85 pub fn as_str(self) -> &'static str {
86 match self {
87 Safety::FormatOnly => "format-only",
88 Safety::BehaviorPreserving => "behavior-preserving",
89 Safety::ScopeLocal => "scope-local",
90 Safety::SurfaceChanging => "surface-changing",
91 Safety::CapabilityChanging => "capability-changing",
92 Safety::NeedsHuman => "needs-human",
93 }
94 }
95
96 /// The applicability tier this safety level maps to. `format-only` and
97 /// `behavior-preserving` are machine-applicable; everything riskier is a
98 /// suggestion.
99 pub fn applicability(self) -> Applicability {
100 if self <= Safety::BehaviorPreserving {
101 Applicability::MachineApplicable
102 } else {
103 Applicability::Suggestion
104 }
105 }
106
107 /// True when the runner may auto-apply this rule's fix without an
108 /// explicit opt-in.
109 pub fn is_auto_applicable(self) -> bool {
110 self.applicability() == Applicability::MachineApplicable
111 }
112}
113
114/// What flavor of work a rule performs, derived from its shape rather than
115/// declared: a rule with a `fix` is a codemod; one with a `message` but no
116/// `fix` is a lint; a bare matcher is a search.
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum RuleKind {
119 /// Find-only: report matches, no diagnostic text, no rewrite.
120 Search,
121 /// Report a diagnostic (`message` + `severity`), no rewrite.
122 Lint,
123 /// Rewrite matches via `fix`.
124 Codemod,
125}
126
127/// The atomic-tier matcher. Exactly one of `pattern` / `kind` / `regex`
128/// must be set on a node that carries one; [`RuleNode::atomic`] resolves it.
129///
130/// A `RuleNode` is the recursive matching algebra: an optional **atomic**
131/// leaf (`pattern` / `kind` / `regex`), **relational** constraints
132/// (`inside` / `has` / `follows` / `precedes`, each a sub-node tuned by
133/// `stop_by` / `field`), and **composite** combinators (`all` / `any` /
134/// `not` / `matches`). Every key set on a node is ANDed: the node matches a
135/// tree-sitter node iff its atomic part matches *and* every relational and
136/// composite part holds.
137#[derive(Debug, Clone, Default, Deserialize)]
138pub struct RuleNode {
139 /// A code snippet in the target grammar with `$VAR` metavariable holes.
140 pub pattern: Option<String>,
141 /// A bare tree-sitter node kind (e.g. `"call_expression"`).
142 pub kind: Option<String>,
143 /// A regular expression matched against node text.
144 pub regex: Option<String>,
145
146 /// The node must be **inside** a node matching this sub-rule (ancestor).
147 pub inside: Option<Box<RuleNode>>,
148 /// The node must **have** a descendant matching this sub-rule.
149 pub has: Option<Box<RuleNode>>,
150 /// The node must **follow** a node matching this sub-rule (earlier).
151 pub follows: Option<Box<RuleNode>>,
152 /// The node must **precede** a node matching this sub-rule (later).
153 pub precedes: Option<Box<RuleNode>>,
154
155 /// Relational reach (used when this node is an `inside`/`has`/… target):
156 /// `neighbor` (direct only, default), `end` (transitive), or a rule that
157 /// halts the walk. (TOML `stopBy` or `stop_by`.)
158 #[serde(default, alias = "stopBy")]
159 pub stop_by: Option<StopBy>,
160 /// Restrict an `inside`/`has` relation to a specific tree-sitter field.
161 pub field: Option<String>,
162
163 /// Every sub-rule must match the node.
164 pub all: Option<Vec<RuleNode>>,
165 /// At least one sub-rule must match the node.
166 pub any: Option<Vec<RuleNode>>,
167 /// The sub-rule must NOT match the node.
168 pub not: Option<Box<RuleNode>>,
169 /// Reference a utility rule by id (resolved from `[utils]`).
170 pub matches: Option<String>,
171}
172
173/// How far a relational op (`inside` / `has` / `follows` / `precedes`)
174/// walks the tree looking for a match.
175#[derive(Debug, Clone, Deserialize)]
176#[serde(untagged)]
177pub enum StopBy {
178 /// `"neighbor"` (direct parent/child/sibling only) or `"end"`
179 /// (transitive — walk to the tree boundary).
180 Keyword(StopKeyword),
181 /// Walk until a node matching this rule is reached, then stop.
182 Rule(Box<RuleNode>),
183}
184
185/// The keyword forms of [`StopBy`].
186#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
187#[serde(rename_all = "lowercase")]
188pub enum StopKeyword {
189 /// Only the immediate neighbor (default).
190 Neighbor,
191 /// Transitive — walk all the way to the tree boundary.
192 End,
193}
194
195/// The resolved, exactly-one atomic matcher.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub enum AtomicMatcher {
198 /// A snippet pattern with metavariable holes.
199 Pattern(String),
200 /// A tree-sitter node kind.
201 Kind(String),
202 /// A regex over node text.
203 Regex(String),
204}
205
206impl RuleNode {
207 /// Resolve this node's atomic leaf. `Ok(None)` when the node is purely
208 /// relational/composite; `Err` when more than one atomic key is set.
209 pub fn atomic(&self) -> Result<Option<AtomicMatcher>, String> {
210 let set: Vec<&str> = [
211 self.pattern.as_ref().map(|_| "pattern"),
212 self.kind.as_ref().map(|_| "kind"),
213 self.regex.as_ref().map(|_| "regex"),
214 ]
215 .into_iter()
216 .flatten()
217 .collect();
218 match set.as_slice() {
219 [] => Ok(None),
220 [one] => Ok(Some(match *one {
221 "pattern" => AtomicMatcher::Pattern(self.pattern.clone().unwrap()),
222 "kind" => AtomicMatcher::Kind(self.kind.clone().unwrap()),
223 _ => AtomicMatcher::Regex(self.regex.clone().unwrap()),
224 })),
225 many => Err(format!(
226 "rule node sets multiple atomic matchers ({}); set at most one",
227 many.join(", ")
228 )),
229 }
230 }
231
232 /// True when `regex` is the only key set — a top-level grep-style rule
233 /// that scans source text rather than the tree.
234 pub fn is_pure_regex(&self) -> bool {
235 self.regex.is_some()
236 && self.pattern.is_none()
237 && self.kind.is_none()
238 && self.inside.is_none()
239 && self.has.is_none()
240 && self.follows.is_none()
241 && self.precedes.is_none()
242 && self.all.is_none()
243 && self.any.is_none()
244 && self.not.is_none()
245 && self.matches.is_none()
246 }
247
248 /// True when the node sets no matching keys at all (an empty node, which
249 /// is a rule authoring error).
250 pub fn is_empty(&self) -> bool {
251 self.pattern.is_none()
252 && self.kind.is_none()
253 && self.regex.is_none()
254 && self.inside.is_none()
255 && self.has.is_none()
256 && self.follows.is_none()
257 && self.precedes.is_none()
258 && self.all.is_none()
259 && self.any.is_none()
260 && self.not.is_none()
261 && self.matches.is_none()
262 }
263}
264
265/// A single declarative rule.
266#[derive(Debug, Clone, Deserialize)]
267#[serde(deny_unknown_fields)]
268pub struct Rule {
269 /// Stable identifier (also the diagnostic code).
270 pub id: String,
271 /// Target language name (resolved via `harn_hostlib::ast::Language`).
272 pub language: String,
273 /// Diagnostic severity. Defaults to `warning`.
274 #[serde(default)]
275 pub severity: Severity,
276 /// Human-readable diagnostic message. Empty for search-only rules.
277 #[serde(default)]
278 pub message: String,
279 /// How risky the `fix` is. Gates auto-apply. Defaults to `scope-local`.
280 #[serde(default)]
281 pub safety: Safety,
282 /// The matcher block (atomic / relational / composite algebra).
283 pub rule: RuleNode,
284 /// Local utility rules referenced by `matches`, keyed by id.
285 /// (TOML `[utils.NAME]`.)
286 #[serde(default)]
287 pub utils: BTreeMap<String, RuleNode>,
288 /// Predicates on captured metavars; a match survives only when every
289 /// constraint holds. (TOML `[[where]]`.)
290 #[serde(default, rename = "where")]
291 pub where_constraints: Vec<Constraint>,
292 /// Derived metavars synthesized from captured ones before `fix`
293 /// interpolation, keyed by the new metavar name. (TOML `[transform.X]`.)
294 #[serde(default)]
295 pub transform: BTreeMap<String, Transform>,
296 /// Replacement template. Its presence makes the rule a codemod.
297 #[serde(default)]
298 pub fix: Option<String>,
299}
300
301/// A `where` predicate on a captured metavar. Exactly one of `regex` /
302/// `comparison` / `pattern` is set.
303#[derive(Debug, Clone, Deserialize)]
304#[serde(deny_unknown_fields)]
305pub struct Constraint {
306 /// The metavar this constraint applies to (without the leading `$`).
307 pub metavar: String,
308 /// The metavar's text must match this regex.
309 #[serde(default)]
310 pub regex: Option<String>,
311 /// The metavar's text, parsed as a number, must satisfy this
312 /// comparison (Semgrep `metavariable-comparison`).
313 #[serde(default)]
314 pub comparison: Option<Comparison>,
315 /// A sub-pattern (Semgrep `metavariable-pattern`) run against the
316 /// metavar's captured text; the constraint holds when it matches.
317 #[serde(default)]
318 pub pattern: Option<String>,
319 /// Optional language override for `pattern` — lets a captured string
320 /// literal be matched in a different grammar than the host file.
321 #[serde(default)]
322 pub language: Option<String>,
323}
324
325/// A numeric/string comparison for a [`Constraint`].
326#[derive(Debug, Clone, Deserialize)]
327#[serde(deny_unknown_fields)]
328pub struct Comparison {
329 /// One of `<` `<=` `>` `>=` `==` `!=`.
330 pub op: String,
331 /// The right-hand side. Numbers compare numerically; strings/bools
332 /// compare with `==` / `!=` only.
333 pub value: toml::Value,
334}
335
336/// A metavar transform: read `source`, apply exactly one operation, bind
337/// the result under a new metavar name (the map key).
338#[derive(Debug, Clone, Deserialize)]
339#[serde(deny_unknown_fields)]
340pub struct Transform {
341 /// The source metavar name (without `$`) whose text is transformed.
342 pub source: String,
343 /// Regex find/replace.
344 #[serde(default)]
345 pub replace: Option<ReplaceOp>,
346 /// A character-index slice.
347 #[serde(default)]
348 pub substring: Option<SubstringOp>,
349 /// A case conversion.
350 #[serde(default)]
351 pub convert: Option<ConvertOp>,
352}
353
354/// Regex find/replace transform op.
355#[derive(Debug, Clone, Deserialize)]
356#[serde(deny_unknown_fields)]
357pub struct ReplaceOp {
358 /// The regex to find.
359 pub regex: String,
360 /// The replacement (supports `$1` capture refs).
361 pub by: String,
362}
363
364/// Character-slice transform op. Indices are 0-based char offsets; a
365/// negative or omitted bound clamps to the string end.
366#[derive(Debug, Clone, Deserialize)]
367#[serde(deny_unknown_fields)]
368pub struct SubstringOp {
369 /// Inclusive start char index (default 0).
370 #[serde(default)]
371 pub start: Option<i64>,
372 /// Exclusive end char index (default: end of string).
373 #[serde(default)]
374 pub end: Option<i64>,
375}
376
377/// Case-conversion transform op.
378#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
379#[serde(rename_all = "snake_case")]
380pub enum ConvertOp {
381 /// `lowerCamelCase`.
382 LowerCamel,
383 /// `UpperCamelCase`.
384 UpperCamel,
385 /// `snake_case`.
386 Snake,
387 /// `SCREAMING_SNAKE_CASE`.
388 ScreamingSnake,
389 /// `kebab-case`.
390 Kebab,
391 /// `lowercase`.
392 Lower,
393 /// `UPPERCASE`.
394 Upper,
395}
396
397impl Rule {
398 /// Derive the rule's kind from its shape (see [`RuleKind`]).
399 pub fn kind(&self) -> RuleKind {
400 if self.fix.is_some() {
401 RuleKind::Codemod
402 } else if self.message.is_empty() {
403 RuleKind::Search
404 } else {
405 RuleKind::Lint
406 }
407 }
408
409 /// Parse a single rule from a TOML document.
410 pub fn from_toml_str(text: &str) -> Result<Self, Box<toml::de::Error>> {
411 toml::from_str(text).map_err(Box::new)
412 }
413}
414
415#[cfg(test)]
416mod tests {
417 use super::*;
418
419 #[test]
420 fn parses_a_codemod_rule() {
421 let rule = Rule::from_toml_str(
422 r#"
423 id = "destructure-default"
424 language = "typescript"
425 severity = "warning"
426 message = "Collapse optional-chain default into a destructuring bind"
427 fix = "{ $KEY: $SRC }"
428
429 [rule]
430 pattern = "$SRC?.$KEY ?? $DEFAULT"
431 "#,
432 )
433 .expect("rule parses");
434 assert_eq!(rule.id, "destructure-default");
435 assert_eq!(rule.language, "typescript");
436 assert_eq!(rule.severity, Severity::Warning);
437 assert_eq!(rule.kind(), RuleKind::Codemod);
438 assert_eq!(
439 rule.rule.atomic().unwrap(),
440 Some(AtomicMatcher::Pattern("$SRC?.$KEY ?? $DEFAULT".into()))
441 );
442 }
443
444 #[test]
445 fn severity_defaults_to_warning() {
446 let rule = Rule::from_toml_str(
447 r#"
448 id = "x"
449 language = "rust"
450 [rule]
451 kind = "macro_invocation"
452 "#,
453 )
454 .unwrap();
455 assert_eq!(rule.severity, Severity::Warning);
456 // No message, no fix -> a search rule.
457 assert_eq!(rule.kind(), RuleKind::Search);
458 }
459
460 #[test]
461 fn lint_rule_has_message_no_fix() {
462 let rule = Rule::from_toml_str(
463 r#"
464 id = "todo"
465 language = "rust"
466 message = "Found a TODO"
467 [rule]
468 regex = "TODO"
469 "#,
470 )
471 .unwrap();
472 assert_eq!(rule.kind(), RuleKind::Lint);
473 assert_eq!(
474 rule.rule.atomic().unwrap(),
475 Some(AtomicMatcher::Regex("TODO".into()))
476 );
477 }
478
479 #[test]
480 fn rejects_multiple_matchers() {
481 let rule = Rule::from_toml_str(
482 r#"
483 id = "x"
484 language = "rust"
485 [rule]
486 kind = "foo"
487 regex = "bar"
488 "#,
489 )
490 .unwrap();
491 assert!(rule.rule.atomic().is_err());
492 }
493
494 #[test]
495 fn empty_matcher_is_detectable() {
496 let rule = Rule::from_toml_str(
497 r#"
498 id = "x"
499 language = "rust"
500 [rule]
501 "#,
502 )
503 .unwrap();
504 // An empty node sets no atomic key (Ok(None)) and is flagged empty.
505 assert_eq!(rule.rule.atomic().unwrap(), None);
506 assert!(rule.rule.is_empty());
507 }
508
509 #[test]
510 fn parses_relational_and_composite_keys() {
511 let rule = Rule::from_toml_str(
512 r#"
513 id = "nested"
514 language = "typescript"
515 [rule]
516 pattern = "let $NAME = $INIT"
517 [rule.inside]
518 kind = "statement_block"
519 stopBy = "end"
520 [rule.not.inside]
521 kind = "try_statement"
522 stopBy = "end"
523 "#,
524 )
525 .expect("parses");
526 assert!(rule.rule.inside.is_some());
527 assert!(rule.rule.not.is_some());
528 assert!(rule.rule.not.as_ref().unwrap().inside.is_some());
529 }
530
531 #[test]
532 fn rejects_unknown_top_level_field() {
533 let err = Rule::from_toml_str(
534 r#"
535 id = "x"
536 language = "rust"
537 bogus = true
538 [rule]
539 kind = "foo"
540 "#,
541 );
542 assert!(err.is_err());
543 }
544}