harn_rules/model.rs
1//! The declarative rule data model.
2//!
3//! A rule is the atomic unit the engine consumes: an identity (`id`,
4//! `language`, `severity`, `message`), a `rule` block describing *what to
5//! match* (the atomic tier: `pattern` snippet, `kind`, or `regex`), and an
6//! optional `fix` describing *how to rewrite* it. Relational/composite
7//! matching (#2833) and `where`/`transform` (#2834) extend this model;
8//! this module is the atomic-tier surface they build on.
9
10use std::collections::BTreeMap;
11
12use serde::Deserialize;
13
14/// Diagnostic severity. Mirrors the `harn-lint` vocabulary so findings can
15/// flow into the same reporting surface.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize)]
17#[serde(rename_all = "lowercase")]
18pub enum Severity {
19 /// Informational; no action required.
20 Info,
21 /// Default — something worth a human's attention.
22 #[default]
23 Warning,
24 /// A problem that should block.
25 Error,
26}
27
28impl Severity {
29 /// The stable lowercase name (the inverse of the `Deserialize` rename),
30 /// used for diagnostics and JSON surfaces.
31 pub fn as_str(self) -> &'static str {
32 match self {
33 Severity::Info => "info",
34 Severity::Warning => "warning",
35 Severity::Error => "error",
36 }
37 }
38}
39
40/// How risky a rule's `fix` is, mapped onto Burin's edit-safety taxonomy.
41/// Ordered least → most dangerous; the codemod runner auto-applies only the
42/// two safest tiers (see [`Safety::applicability`]).
43#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Deserialize)]
44#[serde(rename_all = "kebab-case")]
45pub enum Safety {
46 /// Whitespace / formatting only.
47 FormatOnly,
48 /// Semantics-preserving rewrite.
49 BehaviorPreserving,
50 /// Changes behavior, but only within the matched scope. **Default** —
51 /// conservative, so an undeclared codemod does not silently auto-apply.
52 #[default]
53 ScopeLocal,
54 /// Changes an externally-visible surface (a signature, an export).
55 SurfaceChanging,
56 /// Changes capabilities / effects (I/O, permissions).
57 CapabilityChanging,
58 /// Always requires a human in the loop.
59 NeedsHuman,
60}
61
62/// Whether a fix may be auto-applied (clippy/ESLint `machine-applicable`)
63/// or is opt-in only (`suggestion`).
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum Applicability {
66 /// Safe to auto-apply (`format-only` / `behavior-preserving`).
67 MachineApplicable,
68 /// Preview / opt-in only.
69 Suggestion,
70}
71
72impl Applicability {
73 /// The stable name used for diagnostics and JSON surfaces.
74 pub fn as_str(self) -> &'static str {
75 match self {
76 Applicability::MachineApplicable => "machine-applicable",
77 Applicability::Suggestion => "suggestion",
78 }
79 }
80}
81
82impl Safety {
83 /// The stable kebab-case name (the inverse of the `Deserialize` rename),
84 /// used for diagnostics and JSON surfaces.
85 pub fn as_str(self) -> &'static str {
86 match self {
87 Safety::FormatOnly => "format-only",
88 Safety::BehaviorPreserving => "behavior-preserving",
89 Safety::ScopeLocal => "scope-local",
90 Safety::SurfaceChanging => "surface-changing",
91 Safety::CapabilityChanging => "capability-changing",
92 Safety::NeedsHuman => "needs-human",
93 }
94 }
95
96 /// The applicability tier this safety level maps to. `format-only` and
97 /// `behavior-preserving` are machine-applicable; everything riskier is a
98 /// suggestion.
99 pub fn applicability(self) -> Applicability {
100 if self <= Safety::BehaviorPreserving {
101 Applicability::MachineApplicable
102 } else {
103 Applicability::Suggestion
104 }
105 }
106
107 /// True when the runner may auto-apply this rule's fix without an
108 /// explicit opt-in.
109 pub fn is_auto_applicable(self) -> bool {
110 self.applicability() == Applicability::MachineApplicable
111 }
112}
113
114/// What flavor of work a rule performs, derived from its shape rather than
115/// declared: a rule with a `fix` is a codemod; one with a `message` but no
116/// `fix` is a lint; a bare matcher is a search.
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum RuleKind {
119 /// Find-only: report matches, no diagnostic text, no rewrite.
120 Search,
121 /// Report a diagnostic (`message` + `severity`), no rewrite.
122 Lint,
123 /// Rewrite matches via `fix`.
124 Codemod,
125}
126
127/// The atomic-tier matcher. Exactly one of `pattern` / `kind` / `regex`
128/// must be set on a node that carries one; [`RuleNode::atomic`] resolves it.
129///
130/// A `RuleNode` is the recursive matching algebra: an optional **atomic**
131/// leaf (`pattern` / `kind` / `regex`), **relational** constraints
132/// (`inside` / `has` / `follows` / `precedes`, each a sub-node tuned by
133/// `stop_by` / `field`), and **composite** combinators (`all` / `any` /
134/// `not` / `matches`). Every key set on a node is ANDed: the node matches a
135/// tree-sitter node iff its atomic part matches *and* every relational and
136/// composite part holds.
137#[derive(Debug, Clone, Default, Deserialize)]
138pub struct RuleNode {
139 /// A code snippet in the target grammar with `$VAR` metavariable holes.
140 pub pattern: Option<String>,
141 /// A bare tree-sitter node kind (e.g. `"call_expression"`).
142 pub kind: Option<String>,
143 /// A regular expression matched against node text.
144 pub regex: Option<String>,
145
146 /// The node must be **inside** a node matching this sub-rule (ancestor).
147 pub inside: Option<Box<RuleNode>>,
148 /// The node must **have** a descendant matching this sub-rule.
149 pub has: Option<Box<RuleNode>>,
150 /// The node must **follow** a node matching this sub-rule (earlier).
151 pub follows: Option<Box<RuleNode>>,
152 /// The node must **precede** a node matching this sub-rule (later).
153 pub precedes: Option<Box<RuleNode>>,
154
155 /// Relational reach (used when this node is an `inside`/`has`/… target):
156 /// `neighbor` (direct only, default), `end` (transitive), or a rule that
157 /// halts the walk. (TOML `stopBy` or `stop_by`.)
158 #[serde(default, alias = "stopBy")]
159 pub stop_by: Option<StopBy>,
160 /// Restrict an `inside`/`has` relation to a specific tree-sitter field.
161 pub field: Option<String>,
162
163 /// Every sub-rule must match the node.
164 pub all: Option<Vec<RuleNode>>,
165 /// At least one sub-rule must match the node.
166 pub any: Option<Vec<RuleNode>>,
167 /// The sub-rule must NOT match the node.
168 pub not: Option<Box<RuleNode>>,
169 /// Reference a utility rule by id (resolved from `[utils]`).
170 pub matches: Option<String>,
171}
172
173/// How far a relational op (`inside` / `has` / `follows` / `precedes`)
174/// walks the tree looking for a match.
175#[derive(Debug, Clone, Deserialize)]
176#[serde(untagged)]
177pub enum StopBy {
178 /// `"neighbor"` (direct parent/child/sibling only) or `"end"`
179 /// (transitive — walk to the tree boundary).
180 Keyword(StopKeyword),
181 /// Walk until a node matching this rule is reached, then stop.
182 Rule(Box<RuleNode>),
183}
184
185/// The keyword forms of [`StopBy`].
186#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
187#[serde(rename_all = "lowercase")]
188pub enum StopKeyword {
189 /// Only the immediate neighbor (default).
190 Neighbor,
191 /// Transitive — walk all the way to the tree boundary.
192 End,
193}
194
195/// The resolved, exactly-one atomic matcher.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub enum AtomicMatcher {
198 /// A snippet pattern with metavariable holes.
199 Pattern(String),
200 /// A tree-sitter node kind.
201 Kind(String),
202 /// A regex over node text.
203 Regex(String),
204}
205
206impl RuleNode {
207 /// Resolve this node's atomic leaf. `Ok(None)` when the node is purely
208 /// relational/composite; `Err` when more than one atomic key is set.
209 pub fn atomic(&self) -> Result<Option<AtomicMatcher>, String> {
210 let set: Vec<&str> = [
211 self.pattern.as_ref().map(|_| "pattern"),
212 self.kind.as_ref().map(|_| "kind"),
213 self.regex.as_ref().map(|_| "regex"),
214 ]
215 .into_iter()
216 .flatten()
217 .collect();
218 match set.as_slice() {
219 [] => Ok(None),
220 [one] => Ok(Some(match *one {
221 "pattern" => AtomicMatcher::Pattern(self.pattern.clone().unwrap()),
222 "kind" => AtomicMatcher::Kind(self.kind.clone().unwrap()),
223 _ => AtomicMatcher::Regex(self.regex.clone().unwrap()),
224 })),
225 many => Err(format!(
226 "rule node sets multiple atomic matchers ({}); set at most one",
227 many.join(", ")
228 )),
229 }
230 }
231
232 /// True when `regex` is the only key set — a top-level grep-style rule
233 /// that scans source text rather than the tree.
234 pub fn is_pure_regex(&self) -> bool {
235 self.regex.is_some()
236 && self.pattern.is_none()
237 && self.kind.is_none()
238 && self.inside.is_none()
239 && self.has.is_none()
240 && self.follows.is_none()
241 && self.precedes.is_none()
242 && self.all.is_none()
243 && self.any.is_none()
244 && self.not.is_none()
245 && self.matches.is_none()
246 }
247
248 /// True when the node sets no matching keys at all (an empty node, which
249 /// is a rule authoring error).
250 pub fn is_empty(&self) -> bool {
251 self.pattern.is_none()
252 && self.kind.is_none()
253 && self.regex.is_none()
254 && self.inside.is_none()
255 && self.has.is_none()
256 && self.follows.is_none()
257 && self.precedes.is_none()
258 && self.all.is_none()
259 && self.any.is_none()
260 && self.not.is_none()
261 && self.matches.is_none()
262 }
263}
264
265/// A single declarative rule.
266#[derive(Debug, Clone, Deserialize)]
267#[serde(deny_unknown_fields)]
268pub struct Rule {
269 /// Stable identifier (also the diagnostic code).
270 pub id: String,
271 /// Target language name (resolved via `harn_hostlib::ast::Language`).
272 pub language: String,
273 /// Diagnostic severity. Defaults to `warning`.
274 #[serde(default)]
275 pub severity: Severity,
276 /// Human-readable diagnostic message. Empty for search-only rules.
277 #[serde(default)]
278 pub message: String,
279 /// How risky the `fix` is. Gates auto-apply. Defaults to `scope-local`.
280 #[serde(default)]
281 pub safety: Safety,
282 /// The matcher block (atomic / relational / composite algebra).
283 pub rule: RuleNode,
284 /// Local utility rules referenced by `matches`, keyed by id.
285 /// (TOML `[utils.NAME]`.)
286 #[serde(default)]
287 pub utils: BTreeMap<String, RuleNode>,
288 /// Predicates on captured metavars; a match survives only when every
289 /// constraint holds. (TOML `[[where]]`.)
290 #[serde(default, rename = "where")]
291 pub where_constraints: Vec<Constraint>,
292 /// Derived metavars synthesized from captured ones before `fix`
293 /// interpolation, keyed by the new metavar name. (TOML `[transform.X]`.)
294 #[serde(default)]
295 pub transform: BTreeMap<String, Transform>,
296 /// Replacement template. Its presence makes the rule a codemod.
297 #[serde(default)]
298 pub fix: Option<String>,
299}
300
301/// A `where` predicate on a captured metavar. Exactly one of `regex` /
302/// `comparison` / `pattern` / `resolves_to` / `type` is set.
303#[derive(Debug, Clone, Deserialize)]
304#[serde(deny_unknown_fields)]
305pub struct Constraint {
306 /// The metavar this constraint applies to (without the leading `$`).
307 pub metavar: String,
308 /// The metavar's text must match this regex.
309 #[serde(default)]
310 pub regex: Option<String>,
311 /// The metavar's text, parsed as a number, must satisfy this
312 /// comparison (Semgrep `metavariable-comparison`).
313 #[serde(default)]
314 pub comparison: Option<Comparison>,
315 /// A sub-pattern (Semgrep `metavariable-pattern`) run against the
316 /// metavar's captured text; the constraint holds when it matches.
317 #[serde(default)]
318 pub pattern: Option<String>,
319 /// Harn-only semantic filter: the captured node must resolve to a
320 /// declaration/binding matching this identity.
321 #[serde(default, alias = "resolvesTo")]
322 pub resolves_to: Option<ResolvedBindingConstraint>,
323 /// Harn-only semantic filter: the capture's attributed type must equal
324 /// this label.
325 #[serde(default, rename = "type")]
326 pub type_: Option<String>,
327 /// Optional language override for `pattern` — lets a captured string
328 /// literal be matched in a different grammar than the host file.
329 #[serde(default)]
330 pub language: Option<String>,
331}
332
333/// A Harn resolved-binding predicate for [`Constraint::resolves_to`].
334#[derive(Debug, Clone, Deserialize)]
335#[serde(deny_unknown_fields)]
336pub struct ResolvedBindingConstraint {
337 /// Exact resolved id (`<kind>:<name>@<line>:<column>`), if supplied.
338 #[serde(default)]
339 pub id: Option<String>,
340 /// Binding/declaration name.
341 #[serde(default)]
342 pub name: Option<String>,
343 /// Binding kind (`fn`, `param`, `let`, ...).
344 #[serde(default)]
345 pub kind: Option<String>,
346 /// 1-based declaration/binding line.
347 #[serde(default)]
348 pub line: Option<usize>,
349 /// 1-based declaration/binding column.
350 #[serde(default)]
351 pub column: Option<usize>,
352}
353
354/// A numeric/string comparison for a [`Constraint`].
355#[derive(Debug, Clone, Deserialize)]
356#[serde(deny_unknown_fields)]
357pub struct Comparison {
358 /// One of `<` `<=` `>` `>=` `==` `!=`.
359 pub op: String,
360 /// The right-hand side. Numbers compare numerically; strings/bools
361 /// compare with `==` / `!=` only.
362 pub value: toml::Value,
363}
364
365/// A metavar transform: read `source`, apply exactly one operation, bind
366/// the result under a new metavar name (the map key).
367#[derive(Debug, Clone, Deserialize)]
368#[serde(deny_unknown_fields)]
369pub struct Transform {
370 /// The source metavar name (without `$`) whose text is transformed.
371 pub source: String,
372 /// Regex find/replace.
373 #[serde(default)]
374 pub replace: Option<ReplaceOp>,
375 /// A character-index slice.
376 #[serde(default)]
377 pub substring: Option<SubstringOp>,
378 /// A case conversion.
379 #[serde(default)]
380 pub convert: Option<ConvertOp>,
381}
382
383/// Regex find/replace transform op.
384#[derive(Debug, Clone, Deserialize)]
385#[serde(deny_unknown_fields)]
386pub struct ReplaceOp {
387 /// The regex to find.
388 pub regex: String,
389 /// The replacement (supports `$1` capture refs).
390 pub by: String,
391}
392
393/// Character-slice transform op. Indices are 0-based char offsets; a
394/// negative or omitted bound clamps to the string end.
395#[derive(Debug, Clone, Deserialize)]
396#[serde(deny_unknown_fields)]
397pub struct SubstringOp {
398 /// Inclusive start char index (default 0).
399 #[serde(default)]
400 pub start: Option<i64>,
401 /// Exclusive end char index (default: end of string).
402 #[serde(default)]
403 pub end: Option<i64>,
404}
405
406/// Case-conversion transform op.
407#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
408#[serde(rename_all = "snake_case")]
409pub enum ConvertOp {
410 /// `lowerCamelCase`.
411 LowerCamel,
412 /// `UpperCamelCase`.
413 UpperCamel,
414 /// `snake_case`.
415 Snake,
416 /// `SCREAMING_SNAKE_CASE`.
417 ScreamingSnake,
418 /// `kebab-case`.
419 Kebab,
420 /// `lowercase`.
421 Lower,
422 /// `UPPERCASE`.
423 Upper,
424}
425
426impl Rule {
427 /// Derive the rule's kind from its shape (see [`RuleKind`]).
428 pub fn kind(&self) -> RuleKind {
429 if self.fix.is_some() {
430 RuleKind::Codemod
431 } else if self.message.is_empty() {
432 RuleKind::Search
433 } else {
434 RuleKind::Lint
435 }
436 }
437
438 /// Parse a single rule from a TOML document.
439 pub fn from_toml_str(text: &str) -> Result<Self, Box<toml::de::Error>> {
440 toml::from_str(text).map_err(Box::new)
441 }
442}
443
444#[cfg(test)]
445mod tests {
446 use super::*;
447
448 #[test]
449 fn parses_a_codemod_rule() {
450 let rule = Rule::from_toml_str(
451 r#"
452 id = "destructure-default"
453 language = "typescript"
454 severity = "warning"
455 message = "Collapse optional-chain default into a destructuring bind"
456 fix = "{ $KEY: $SRC }"
457
458 [rule]
459 pattern = "$SRC?.$KEY ?? $DEFAULT"
460 "#,
461 )
462 .expect("rule parses");
463 assert_eq!(rule.id, "destructure-default");
464 assert_eq!(rule.language, "typescript");
465 assert_eq!(rule.severity, Severity::Warning);
466 assert_eq!(rule.kind(), RuleKind::Codemod);
467 assert_eq!(
468 rule.rule.atomic().unwrap(),
469 Some(AtomicMatcher::Pattern("$SRC?.$KEY ?? $DEFAULT".into()))
470 );
471 }
472
473 #[test]
474 fn severity_defaults_to_warning() {
475 let rule = Rule::from_toml_str(
476 r#"
477 id = "x"
478 language = "rust"
479 [rule]
480 kind = "macro_invocation"
481 "#,
482 )
483 .unwrap();
484 assert_eq!(rule.severity, Severity::Warning);
485 // No message, no fix -> a search rule.
486 assert_eq!(rule.kind(), RuleKind::Search);
487 }
488
489 #[test]
490 fn lint_rule_has_message_no_fix() {
491 let rule = Rule::from_toml_str(
492 r#"
493 id = "todo"
494 language = "rust"
495 message = "Found a TODO"
496 [rule]
497 regex = "TODO"
498 "#,
499 )
500 .unwrap();
501 assert_eq!(rule.kind(), RuleKind::Lint);
502 assert_eq!(
503 rule.rule.atomic().unwrap(),
504 Some(AtomicMatcher::Regex("TODO".into()))
505 );
506 }
507
508 #[test]
509 fn rejects_multiple_matchers() {
510 let rule = Rule::from_toml_str(
511 r#"
512 id = "x"
513 language = "rust"
514 [rule]
515 kind = "foo"
516 regex = "bar"
517 "#,
518 )
519 .unwrap();
520 assert!(rule.rule.atomic().is_err());
521 }
522
523 #[test]
524 fn empty_matcher_is_detectable() {
525 let rule = Rule::from_toml_str(
526 r#"
527 id = "x"
528 language = "rust"
529 [rule]
530 "#,
531 )
532 .unwrap();
533 // An empty node sets no atomic key (Ok(None)) and is flagged empty.
534 assert_eq!(rule.rule.atomic().unwrap(), None);
535 assert!(rule.rule.is_empty());
536 }
537
538 #[test]
539 fn parses_relational_and_composite_keys() {
540 let rule = Rule::from_toml_str(
541 r#"
542 id = "nested"
543 language = "typescript"
544 [rule]
545 pattern = "let $NAME = $INIT"
546 [rule.inside]
547 kind = "statement_block"
548 stopBy = "end"
549 [rule.not.inside]
550 kind = "try_statement"
551 stopBy = "end"
552 "#,
553 )
554 .expect("parses");
555 assert!(rule.rule.inside.is_some());
556 assert!(rule.rule.not.is_some());
557 assert!(rule.rule.not.as_ref().unwrap().inside.is_some());
558 }
559
560 #[test]
561 fn rejects_unknown_top_level_field() {
562 let err = Rule::from_toml_str(
563 r#"
564 id = "x"
565 language = "rust"
566 bogus = true
567 [rule]
568 kind = "foo"
569 "#,
570 );
571 assert!(err.is_err());
572 }
573
574 #[test]
575 fn parses_semantic_where_constraints() {
576 let rule = Rule::from_toml_str(
577 r#"
578 id = "x"
579 language = "harn"
580 [rule]
581 pattern = "$FN($ARG)"
582
583 [[where]]
584 metavar = "FN"
585 resolvesTo = { name = "target", kind = "fn", line = 1, column = 4 }
586
587 [[where]]
588 metavar = "ARG"
589 type = "int"
590 "#,
591 )
592 .unwrap();
593 assert_eq!(rule.where_constraints.len(), 2);
594 let resolved = rule.where_constraints[0].resolves_to.as_ref().unwrap();
595 assert_eq!(resolved.name.as_deref(), Some("target"));
596 assert_eq!(resolved.kind.as_deref(), Some("fn"));
597 assert_eq!(resolved.line, Some(1));
598 assert_eq!(resolved.column, Some(4));
599 assert_eq!(rule.where_constraints[1].type_.as_deref(), Some("int"));
600 }
601}