harn_rules/model.rs
1//! The declarative rule data model.
2//!
3//! A rule is the atomic unit the engine consumes: an identity (`id`,
4//! `language`, `severity`, `message`), a `rule` block describing *what to
5//! match* (the atomic tier: `pattern` snippet, `kind`, or `regex`), and an
6//! optional `fix` describing *how to rewrite* it. Relational/composite
7//! matching (#2833) and `where`/`transform` (#2834) extend this model;
8//! this module is the atomic-tier surface they build on.
9
10use std::collections::BTreeMap;
11
12use serde::Deserialize;
13
14/// Diagnostic severity. Mirrors the `harn-lint` vocabulary so findings can
15/// flow into the same reporting surface.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
17#[serde(rename_all = "lowercase")]
18pub enum Severity {
19 /// Informational; no action required.
20 Info,
21 /// Default — something worth a human's attention.
22 #[default]
23 Warning,
24 /// A problem that should block.
25 Error,
26}
27
28/// How risky a rule's `fix` is, mapped onto Burin's edit-safety taxonomy.
29/// Ordered least → most dangerous; the codemod runner auto-applies only the
30/// two safest tiers (see [`Safety::applicability`]).
31#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Deserialize)]
32#[serde(rename_all = "kebab-case")]
33pub enum Safety {
34 /// Whitespace / formatting only.
35 FormatOnly,
36 /// Semantics-preserving rewrite.
37 BehaviorPreserving,
38 /// Changes behavior, but only within the matched scope. **Default** —
39 /// conservative, so an undeclared codemod does not silently auto-apply.
40 #[default]
41 ScopeLocal,
42 /// Changes an externally-visible surface (a signature, an export).
43 SurfaceChanging,
44 /// Changes capabilities / effects (I/O, permissions).
45 CapabilityChanging,
46 /// Always requires a human in the loop.
47 NeedsHuman,
48}
49
50/// Whether a fix may be auto-applied (clippy/ESLint `machine-applicable`)
51/// or is opt-in only (`suggestion`).
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Applicability {
54 /// Safe to auto-apply (`format-only` / `behavior-preserving`).
55 MachineApplicable,
56 /// Preview / opt-in only.
57 Suggestion,
58}
59
60impl Safety {
61 /// The applicability tier this safety level maps to. `format-only` and
62 /// `behavior-preserving` are machine-applicable; everything riskier is a
63 /// suggestion.
64 pub fn applicability(self) -> Applicability {
65 if self <= Safety::BehaviorPreserving {
66 Applicability::MachineApplicable
67 } else {
68 Applicability::Suggestion
69 }
70 }
71
72 /// True when the runner may auto-apply this rule's fix without an
73 /// explicit opt-in.
74 pub fn is_auto_applicable(self) -> bool {
75 self.applicability() == Applicability::MachineApplicable
76 }
77}
78
79/// What flavor of work a rule performs, derived from its shape rather than
80/// declared: a rule with a `fix` is a codemod; one with a `message` but no
81/// `fix` is a lint; a bare matcher is a search.
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum RuleKind {
84 /// Find-only: report matches, no diagnostic text, no rewrite.
85 Search,
86 /// Report a diagnostic (`message` + `severity`), no rewrite.
87 Lint,
88 /// Rewrite matches via `fix`.
89 Codemod,
90}
91
92/// The atomic-tier matcher. Exactly one of `pattern` / `kind` / `regex`
93/// must be set on a node that carries one; [`RuleNode::atomic`] resolves it.
94///
95/// A `RuleNode` is the recursive matching algebra: an optional **atomic**
96/// leaf (`pattern` / `kind` / `regex`), **relational** constraints
97/// (`inside` / `has` / `follows` / `precedes`, each a sub-node tuned by
98/// `stop_by` / `field`), and **composite** combinators (`all` / `any` /
99/// `not` / `matches`). Every key set on a node is ANDed: the node matches a
100/// tree-sitter node iff its atomic part matches *and* every relational and
101/// composite part holds.
102#[derive(Debug, Clone, Default, Deserialize)]
103pub struct RuleNode {
104 /// A code snippet in the target grammar with `$VAR` metavariable holes.
105 pub pattern: Option<String>,
106 /// A bare tree-sitter node kind (e.g. `"call_expression"`).
107 pub kind: Option<String>,
108 /// A regular expression matched against node text.
109 pub regex: Option<String>,
110
111 /// The node must be **inside** a node matching this sub-rule (ancestor).
112 pub inside: Option<Box<RuleNode>>,
113 /// The node must **have** a descendant matching this sub-rule.
114 pub has: Option<Box<RuleNode>>,
115 /// The node must **follow** a node matching this sub-rule (earlier).
116 pub follows: Option<Box<RuleNode>>,
117 /// The node must **precede** a node matching this sub-rule (later).
118 pub precedes: Option<Box<RuleNode>>,
119
120 /// Relational reach (used when this node is an `inside`/`has`/… target):
121 /// `neighbor` (direct only, default), `end` (transitive), or a rule that
122 /// halts the walk. (TOML `stopBy` or `stop_by`.)
123 #[serde(default, alias = "stopBy")]
124 pub stop_by: Option<StopBy>,
125 /// Restrict an `inside`/`has` relation to a specific tree-sitter field.
126 pub field: Option<String>,
127
128 /// Every sub-rule must match the node.
129 pub all: Option<Vec<RuleNode>>,
130 /// At least one sub-rule must match the node.
131 pub any: Option<Vec<RuleNode>>,
132 /// The sub-rule must NOT match the node.
133 pub not: Option<Box<RuleNode>>,
134 /// Reference a utility rule by id (resolved from `[utils]`).
135 pub matches: Option<String>,
136}
137
138/// How far a relational op (`inside` / `has` / `follows` / `precedes`)
139/// walks the tree looking for a match.
140#[derive(Debug, Clone, Deserialize)]
141#[serde(untagged)]
142pub enum StopBy {
143 /// `"neighbor"` (direct parent/child/sibling only) or `"end"`
144 /// (transitive — walk to the tree boundary).
145 Keyword(StopKeyword),
146 /// Walk until a node matching this rule is reached, then stop.
147 Rule(Box<RuleNode>),
148}
149
150/// The keyword forms of [`StopBy`].
151#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
152#[serde(rename_all = "lowercase")]
153pub enum StopKeyword {
154 /// Only the immediate neighbor (default).
155 Neighbor,
156 /// Transitive — walk all the way to the tree boundary.
157 End,
158}
159
160/// The resolved, exactly-one atomic matcher.
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub enum AtomicMatcher {
163 /// A snippet pattern with metavariable holes.
164 Pattern(String),
165 /// A tree-sitter node kind.
166 Kind(String),
167 /// A regex over node text.
168 Regex(String),
169}
170
171impl RuleNode {
172 /// Resolve this node's atomic leaf. `Ok(None)` when the node is purely
173 /// relational/composite; `Err` when more than one atomic key is set.
174 pub fn atomic(&self) -> Result<Option<AtomicMatcher>, String> {
175 let set: Vec<&str> = [
176 self.pattern.as_ref().map(|_| "pattern"),
177 self.kind.as_ref().map(|_| "kind"),
178 self.regex.as_ref().map(|_| "regex"),
179 ]
180 .into_iter()
181 .flatten()
182 .collect();
183 match set.as_slice() {
184 [] => Ok(None),
185 [one] => Ok(Some(match *one {
186 "pattern" => AtomicMatcher::Pattern(self.pattern.clone().unwrap()),
187 "kind" => AtomicMatcher::Kind(self.kind.clone().unwrap()),
188 _ => AtomicMatcher::Regex(self.regex.clone().unwrap()),
189 })),
190 many => Err(format!(
191 "rule node sets multiple atomic matchers ({}); set at most one",
192 many.join(", ")
193 )),
194 }
195 }
196
197 /// True when `regex` is the only key set — a top-level grep-style rule
198 /// that scans source text rather than the tree.
199 pub fn is_pure_regex(&self) -> bool {
200 self.regex.is_some()
201 && self.pattern.is_none()
202 && self.kind.is_none()
203 && self.inside.is_none()
204 && self.has.is_none()
205 && self.follows.is_none()
206 && self.precedes.is_none()
207 && self.all.is_none()
208 && self.any.is_none()
209 && self.not.is_none()
210 && self.matches.is_none()
211 }
212
213 /// True when the node sets no matching keys at all (an empty node, which
214 /// is a rule authoring error).
215 pub fn is_empty(&self) -> bool {
216 self.pattern.is_none()
217 && self.kind.is_none()
218 && self.regex.is_none()
219 && self.inside.is_none()
220 && self.has.is_none()
221 && self.follows.is_none()
222 && self.precedes.is_none()
223 && self.all.is_none()
224 && self.any.is_none()
225 && self.not.is_none()
226 && self.matches.is_none()
227 }
228}
229
230/// A single declarative rule.
231#[derive(Debug, Clone, Deserialize)]
232#[serde(deny_unknown_fields)]
233pub struct Rule {
234 /// Stable identifier (also the diagnostic code).
235 pub id: String,
236 /// Target language name (resolved via `harn_hostlib::ast::Language`).
237 pub language: String,
238 /// Diagnostic severity. Defaults to `warning`.
239 #[serde(default)]
240 pub severity: Severity,
241 /// Human-readable diagnostic message. Empty for search-only rules.
242 #[serde(default)]
243 pub message: String,
244 /// How risky the `fix` is. Gates auto-apply. Defaults to `scope-local`.
245 #[serde(default)]
246 pub safety: Safety,
247 /// The matcher block (atomic / relational / composite algebra).
248 pub rule: RuleNode,
249 /// Local utility rules referenced by `matches`, keyed by id.
250 /// (TOML `[utils.NAME]`.)
251 #[serde(default)]
252 pub utils: BTreeMap<String, RuleNode>,
253 /// Predicates on captured metavars; a match survives only when every
254 /// constraint holds. (TOML `[[where]]`.)
255 #[serde(default, rename = "where")]
256 pub where_constraints: Vec<Constraint>,
257 /// Derived metavars synthesized from captured ones before `fix`
258 /// interpolation, keyed by the new metavar name. (TOML `[transform.X]`.)
259 #[serde(default)]
260 pub transform: BTreeMap<String, Transform>,
261 /// Replacement template. Its presence makes the rule a codemod.
262 #[serde(default)]
263 pub fix: Option<String>,
264}
265
266/// A `where` predicate on a captured metavar. Exactly one of `regex` /
267/// `comparison` / `pattern` is set.
268#[derive(Debug, Clone, Deserialize)]
269#[serde(deny_unknown_fields)]
270pub struct Constraint {
271 /// The metavar this constraint applies to (without the leading `$`).
272 pub metavar: String,
273 /// The metavar's text must match this regex.
274 #[serde(default)]
275 pub regex: Option<String>,
276 /// The metavar's text, parsed as a number, must satisfy this
277 /// comparison (Semgrep `metavariable-comparison`).
278 #[serde(default)]
279 pub comparison: Option<Comparison>,
280 /// A sub-pattern (Semgrep `metavariable-pattern`) run against the
281 /// metavar's captured text; the constraint holds when it matches.
282 #[serde(default)]
283 pub pattern: Option<String>,
284 /// Optional language override for `pattern` — lets a captured string
285 /// literal be matched in a different grammar than the host file.
286 #[serde(default)]
287 pub language: Option<String>,
288}
289
290/// A numeric/string comparison for a [`Constraint`].
291#[derive(Debug, Clone, Deserialize)]
292#[serde(deny_unknown_fields)]
293pub struct Comparison {
294 /// One of `<` `<=` `>` `>=` `==` `!=`.
295 pub op: String,
296 /// The right-hand side. Numbers compare numerically; strings/bools
297 /// compare with `==` / `!=` only.
298 pub value: toml::Value,
299}
300
301/// A metavar transform: read `source`, apply exactly one operation, bind
302/// the result under a new metavar name (the map key).
303#[derive(Debug, Clone, Deserialize)]
304#[serde(deny_unknown_fields)]
305pub struct Transform {
306 /// The source metavar name (without `$`) whose text is transformed.
307 pub source: String,
308 /// Regex find/replace.
309 #[serde(default)]
310 pub replace: Option<ReplaceOp>,
311 /// A character-index slice.
312 #[serde(default)]
313 pub substring: Option<SubstringOp>,
314 /// A case conversion.
315 #[serde(default)]
316 pub convert: Option<ConvertOp>,
317}
318
319/// Regex find/replace transform op.
320#[derive(Debug, Clone, Deserialize)]
321#[serde(deny_unknown_fields)]
322pub struct ReplaceOp {
323 /// The regex to find.
324 pub regex: String,
325 /// The replacement (supports `$1` capture refs).
326 pub by: String,
327}
328
329/// Character-slice transform op. Indices are 0-based char offsets; a
330/// negative or omitted bound clamps to the string end.
331#[derive(Debug, Clone, Deserialize)]
332#[serde(deny_unknown_fields)]
333pub struct SubstringOp {
334 /// Inclusive start char index (default 0).
335 #[serde(default)]
336 pub start: Option<i64>,
337 /// Exclusive end char index (default: end of string).
338 #[serde(default)]
339 pub end: Option<i64>,
340}
341
342/// Case-conversion transform op.
343#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
344#[serde(rename_all = "snake_case")]
345pub enum ConvertOp {
346 /// `lowerCamelCase`.
347 LowerCamel,
348 /// `UpperCamelCase`.
349 UpperCamel,
350 /// `snake_case`.
351 Snake,
352 /// `SCREAMING_SNAKE_CASE`.
353 ScreamingSnake,
354 /// `kebab-case`.
355 Kebab,
356 /// `lowercase`.
357 Lower,
358 /// `UPPERCASE`.
359 Upper,
360}
361
362impl Rule {
363 /// Derive the rule's kind from its shape (see [`RuleKind`]).
364 pub fn kind(&self) -> RuleKind {
365 if self.fix.is_some() {
366 RuleKind::Codemod
367 } else if self.message.is_empty() {
368 RuleKind::Search
369 } else {
370 RuleKind::Lint
371 }
372 }
373
374 /// Parse a single rule from a TOML document.
375 pub fn from_toml_str(text: &str) -> Result<Self, Box<toml::de::Error>> {
376 toml::from_str(text).map_err(Box::new)
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383
384 #[test]
385 fn parses_a_codemod_rule() {
386 let rule = Rule::from_toml_str(
387 r#"
388 id = "destructure-default"
389 language = "typescript"
390 severity = "warning"
391 message = "Collapse optional-chain default into a destructuring bind"
392 fix = "{ $KEY: $SRC }"
393
394 [rule]
395 pattern = "$SRC?.$KEY ?? $DEFAULT"
396 "#,
397 )
398 .expect("rule parses");
399 assert_eq!(rule.id, "destructure-default");
400 assert_eq!(rule.language, "typescript");
401 assert_eq!(rule.severity, Severity::Warning);
402 assert_eq!(rule.kind(), RuleKind::Codemod);
403 assert_eq!(
404 rule.rule.atomic().unwrap(),
405 Some(AtomicMatcher::Pattern("$SRC?.$KEY ?? $DEFAULT".into()))
406 );
407 }
408
409 #[test]
410 fn severity_defaults_to_warning() {
411 let rule = Rule::from_toml_str(
412 r#"
413 id = "x"
414 language = "rust"
415 [rule]
416 kind = "macro_invocation"
417 "#,
418 )
419 .unwrap();
420 assert_eq!(rule.severity, Severity::Warning);
421 // No message, no fix -> a search rule.
422 assert_eq!(rule.kind(), RuleKind::Search);
423 }
424
425 #[test]
426 fn lint_rule_has_message_no_fix() {
427 let rule = Rule::from_toml_str(
428 r#"
429 id = "todo"
430 language = "rust"
431 message = "Found a TODO"
432 [rule]
433 regex = "TODO"
434 "#,
435 )
436 .unwrap();
437 assert_eq!(rule.kind(), RuleKind::Lint);
438 assert_eq!(
439 rule.rule.atomic().unwrap(),
440 Some(AtomicMatcher::Regex("TODO".into()))
441 );
442 }
443
444 #[test]
445 fn rejects_multiple_matchers() {
446 let rule = Rule::from_toml_str(
447 r#"
448 id = "x"
449 language = "rust"
450 [rule]
451 kind = "foo"
452 regex = "bar"
453 "#,
454 )
455 .unwrap();
456 assert!(rule.rule.atomic().is_err());
457 }
458
459 #[test]
460 fn empty_matcher_is_detectable() {
461 let rule = Rule::from_toml_str(
462 r#"
463 id = "x"
464 language = "rust"
465 [rule]
466 "#,
467 )
468 .unwrap();
469 // An empty node sets no atomic key (Ok(None)) and is flagged empty.
470 assert_eq!(rule.rule.atomic().unwrap(), None);
471 assert!(rule.rule.is_empty());
472 }
473
474 #[test]
475 fn parses_relational_and_composite_keys() {
476 let rule = Rule::from_toml_str(
477 r#"
478 id = "nested"
479 language = "typescript"
480 [rule]
481 pattern = "let $NAME = $INIT"
482 [rule.inside]
483 kind = "statement_block"
484 stopBy = "end"
485 [rule.not.inside]
486 kind = "try_statement"
487 stopBy = "end"
488 "#,
489 )
490 .expect("parses");
491 assert!(rule.rule.inside.is_some());
492 assert!(rule.rule.not.is_some());
493 assert!(rule.rule.not.as_ref().unwrap().inside.is_some());
494 }
495
496 #[test]
497 fn rejects_unknown_top_level_field() {
498 let err = Rule::from_toml_str(
499 r#"
500 id = "x"
501 language = "rust"
502 bogus = true
503 [rule]
504 kind = "foo"
505 "#,
506 );
507 assert!(err.is_err());
508 }
509}