1use std::collections::BTreeMap;
12
13use harn_hostlib::ast::Language;
14use serde::Serialize;
15
16use crate::constraint::CompiledConstraint;
17use crate::error::RulesError;
18use crate::evaluator::CompiledRuleTree;
19use crate::fix::{interpolate, splice, AppliedEdit};
20use crate::model::{Applicability, Rule, Safety, Severity};
21use crate::semantic::enrich_harn_matches;
22use crate::transform::CompiledTransform;
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
27pub struct Span {
28 pub start_byte: usize,
30 pub end_byte: usize,
32 pub start_row: usize,
34 pub start_col: usize,
36 pub end_row: usize,
38 pub end_col: usize,
40}
41
42impl Span {
43 pub(crate) fn of(node: tree_sitter::Node<'_>) -> Self {
44 let start = node.start_position();
45 let end = node.end_position();
46 Span {
47 start_byte: node.start_byte(),
48 end_byte: node.end_byte(),
49 start_row: start.row,
50 start_col: start.column,
51 end_row: end.row,
52 end_col: end.column,
53 }
54 }
55}
56
57#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)]
61pub struct BindingMetadata {
62 #[serde(skip_serializing_if = "Option::is_none")]
64 pub resolved: Option<ResolvedBinding>,
65 #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
67 pub ty: Option<String>,
68}
69
70impl BindingMetadata {
71 pub fn is_empty(&self) -> bool {
73 self.resolved.is_none() && self.ty.is_none()
74 }
75}
76
77#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
79pub struct ResolvedBinding {
80 pub id: String,
82 pub name: String,
84 pub kind: String,
87 #[serde(flatten)]
89 pub span: Span,
90}
91
92#[derive(Debug, Clone)]
94pub struct Binding {
95 pub text: String,
97 pub span: Span,
99 pub metadata: BindingMetadata,
101}
102
103impl Binding {
104 pub(crate) fn new(text: String, span: Span) -> Self {
105 Binding {
106 text,
107 span,
108 metadata: BindingMetadata::default(),
109 }
110 }
111}
112
113#[derive(Debug, Clone)]
115pub struct RuleMatch {
116 pub rule_id: String,
118 pub span: Span,
120 pub text: String,
122 pub bindings: BTreeMap<String, Binding>,
125}
126
127#[derive(Debug, Clone)]
129pub struct CodemodResult {
130 pub rewritten: String,
132 pub edits: Vec<AppliedEdit>,
134 pub changed: bool,
136 pub safety: Safety,
138 pub applicability: Applicability,
140 pub idempotent: bool,
143}
144
145pub struct CompiledRule {
147 rule_id: String,
148 language: Language,
149 execution: Execution,
150 constraints: Vec<CompiledConstraint>,
152 transforms: Vec<(String, CompiledTransform)>,
154 fix: Option<String>,
156 safety: Safety,
158 message: String,
160 severity: Severity,
162}
163
164#[derive(Debug, Clone)]
167pub struct Diagnostic {
168 pub rule_id: String,
170 pub message: String,
172 pub severity: Severity,
174 pub span: Span,
176 pub applicability: Applicability,
178 pub fix: Option<String>,
181}
182
183enum Execution {
184 SourceRegex(regex::Regex),
187 Tree(Box<CompiledRuleTree>),
189}
190
191impl CompiledRule {
192 pub fn compile(rule: &Rule) -> Result<Self, RulesError> {
194 let language =
195 Language::from_name(&rule.language).ok_or_else(|| RulesError::UnknownLanguage {
196 rule: rule.id.clone(),
197 language: rule.language.clone(),
198 })?;
199
200 let execution = if rule.rule.is_pure_regex() {
204 let pattern = rule.rule.regex.as_ref().expect("pure regex");
205 Execution::SourceRegex(regex::Regex::new(pattern).map_err(|err| {
206 RulesError::PatternCompile {
207 rule: rule.id.clone(),
208 message: format!("invalid regex `{pattern}`: {err}"),
209 }
210 })?)
211 } else {
212 Execution::Tree(Box::new(CompiledRuleTree::compile(
213 &rule.id,
214 language,
215 &rule.rule,
216 &rule.utils,
217 )?))
218 };
219
220 let constraints = rule
221 .where_constraints
222 .iter()
223 .map(|c| CompiledConstraint::compile(&rule.id, language, c))
224 .collect::<Result<Vec<_>, _>>()?;
225
226 let transforms = rule
227 .transform
228 .iter()
229 .map(|(name, t)| {
230 CompiledTransform::compile(&rule.id, name, t).map(|c| (name.clone(), c))
231 })
232 .collect::<Result<Vec<_>, _>>()?;
233
234 Ok(CompiledRule {
235 rule_id: rule.id.clone(),
236 language,
237 execution,
238 constraints,
239 transforms,
240 fix: rule.fix.clone(),
241 safety: rule.safety,
242 message: rule.message.clone(),
243 severity: rule.severity,
244 })
245 }
246
247 pub fn language(&self) -> Language {
249 self.language
250 }
251
252 pub fn safety(&self) -> Safety {
254 self.safety
255 }
256
257 pub fn applicability(&self) -> Applicability {
260 self.safety.applicability()
261 }
262
263 pub fn id(&self) -> &str {
265 &self.rule_id
266 }
267
268 pub fn severity(&self) -> Severity {
271 self.severity
272 }
273
274 pub fn message(&self) -> &str {
276 &self.message
277 }
278
279 pub fn run(&self, source: &str) -> Result<Vec<RuleMatch>, RulesError> {
282 let mut matches = match &self.execution {
283 Execution::SourceRegex(regex) => self.run_regex(regex, source),
284 Execution::Tree(tree) => tree
285 .find(&self.rule_id, self.language, source)?
286 .into_iter()
287 .map(|m| RuleMatch {
288 rule_id: self.rule_id.clone(),
289 span: m.span,
290 text: m.text,
291 bindings: m.bindings,
292 })
293 .collect(),
294 };
295 if self.language == Language::Harn && !matches.is_empty() {
296 enrich_harn_matches(source, &mut matches).map_err(|message| {
297 RulesError::SourceParse {
298 rule: self.rule_id.clone(),
299 message,
300 }
301 })?;
302 }
303 if !self.constraints.is_empty() {
304 matches.retain(|m| self.satisfies_constraints(m));
305 }
306 Ok(matches)
307 }
308
309 fn satisfies_constraints(&self, m: &RuleMatch) -> bool {
312 self.constraints
313 .iter()
314 .all(|c| m.bindings.get(&c.metavar).is_some_and(|b| c.evaluate(b)))
315 }
316
317 pub fn apply(&self, source: &str) -> Result<CodemodResult, RulesError> {
326 let (rewritten, edits) = self.rewrite(source)?;
327 let changed = rewritten != source;
328 let (twice, _) = self.rewrite(&rewritten)?;
331 let idempotent = twice == rewritten;
332 Ok(CodemodResult {
333 rewritten,
334 edits,
335 changed,
336 safety: self.safety,
337 applicability: self.applicability(),
338 idempotent,
339 })
340 }
341
342 pub fn auto_apply(&self, source: &str) -> Result<CodemodResult, RulesError> {
346 if !self.safety.is_auto_applicable() {
347 return Err(RulesError::NotAutoApplicable {
348 rule: self.rule_id.clone(),
349 safety: format!("{:?}", self.safety),
350 });
351 }
352 self.apply(source)
353 }
354
355 pub fn apply_checked(&self, source: &str) -> Result<CodemodResult, RulesError> {
359 let result = self.apply(source)?;
360 if !result.idempotent {
361 return Err(RulesError::NotIdempotent {
362 rule: self.rule_id.clone(),
363 });
364 }
365 Ok(result)
366 }
367
368 pub fn diagnostics(&self, source: &str) -> Result<Vec<Diagnostic>, RulesError> {
373 let applicability = self.applicability();
374 let matches = self.run(source)?;
375 Ok(matches
376 .iter()
377 .map(|m| Diagnostic {
378 rule_id: self.rule_id.clone(),
379 message: self.message.clone(),
380 severity: self.severity,
381 span: m.span,
382 applicability,
383 fix: self.fix.as_ref().map(|template| {
384 let vars = self.metavars_for(m);
385 interpolate(template, &vars)
386 }),
387 })
388 .collect())
389 }
390
391 fn rewrite(&self, source: &str) -> Result<(String, Vec<AppliedEdit>), RulesError> {
394 let template = self
395 .fix
396 .as_ref()
397 .ok_or_else(|| RulesError::PatternCompile {
398 rule: self.rule_id.clone(),
399 message: "apply requires a `fix` template; this rule has none".into(),
400 })?;
401
402 let matches = dedupe_overlapping(self.run(source)?);
403 let edits: Vec<AppliedEdit> = matches
404 .iter()
405 .map(|m| {
406 let vars = self.metavars_for(m);
407 AppliedEdit {
408 span: m.span,
409 before: m.text.clone(),
410 replacement: interpolate(template, &vars),
411 }
412 })
413 .collect();
414 Ok((splice(source, &edits), edits))
415 }
416
417 fn metavars_for(&self, m: &RuleMatch) -> BTreeMap<String, String> {
420 let mut vars: BTreeMap<String, String> = m
421 .bindings
422 .iter()
423 .map(|(name, binding)| (name.clone(), binding.text.clone()))
424 .collect();
425 for (name, transform) in &self.transforms {
426 let input = m
427 .bindings
428 .get(&transform.source)
429 .map(|b| b.text.as_str())
430 .unwrap_or("");
431 vars.insert(name.clone(), transform.apply(input));
432 }
433 vars
434 }
435
436 fn run_regex(&self, regex: ®ex::Regex, source: &str) -> Vec<RuleMatch> {
437 let mut matches = Vec::new();
438 let mut cursor = RowColCursor::new(source);
443 for m in regex.find_iter(source) {
444 let (start_row, start_col) = cursor.advance_to(m.start());
445 let (end_row, end_col) = cursor.advance_to(m.end());
446 matches.push(RuleMatch {
447 rule_id: self.rule_id.clone(),
448 span: Span {
449 start_byte: m.start(),
450 end_byte: m.end(),
451 start_row,
452 start_col,
453 end_row,
454 end_col,
455 },
456 text: m.as_str().to_string(),
457 bindings: BTreeMap::new(),
458 });
459 }
460 matches
461 }
462}
463
464fn dedupe_overlapping(mut matches: Vec<RuleMatch>) -> Vec<RuleMatch> {
472 matches.sort_by(|a, b| {
475 a.span
476 .start_byte
477 .cmp(&b.span.start_byte)
478 .then(b.span.end_byte.cmp(&a.span.end_byte))
479 });
480 let mut kept: Vec<RuleMatch> = Vec::with_capacity(matches.len());
481 let mut covered_to = 0usize; for m in matches {
483 if m.span.start_byte >= covered_to {
487 covered_to = m.span.end_byte.max(covered_to);
488 kept.push(m);
489 }
490 }
491 kept
492}
493
494struct RowColCursor<'a> {
499 source: &'a str,
500 byte: usize,
501 row: usize,
502 col: usize,
503}
504
505impl<'a> RowColCursor<'a> {
506 fn new(source: &'a str) -> Self {
507 Self {
508 source,
509 byte: 0,
510 row: 0,
511 col: 0,
512 }
513 }
514
515 fn advance_to(&mut self, target: usize) -> (usize, usize) {
519 for ch in self.source[self.byte..target].chars() {
520 if ch == '\n' {
521 self.row += 1;
522 self.col = 0;
523 } else {
524 self.col += 1;
525 }
526 }
527 self.byte = target;
528 (self.row, self.col)
529 }
530}
531
532#[cfg(test)]
533mod tests {
534 use super::*;
535 use crate::model::Rule;
536
537 fn rule(toml: &str) -> CompiledRule {
538 let parsed = Rule::from_toml_str(toml).expect("rule parses");
539 CompiledRule::compile(&parsed).expect("rule compiles")
540 }
541
542 #[test]
543 fn pattern_rule_binds_metavars() {
544 let compiled = rule(
545 r#"
546 id = "destructure-default"
547 language = "typescript"
548 fix = "{ $KEY: $SRC }"
549 [rule]
550 pattern = "$SRC?.$KEY ?? $DEFAULT"
551 "#,
552 );
553 let matches = compiled
554 .run("const a = cfg?.timeout ?? 30;\nconst b = opts?.retries ?? 3;\n")
555 .unwrap();
556 assert_eq!(matches.len(), 2);
557 assert_eq!(matches[0].bindings["SRC"].text, "cfg");
558 assert_eq!(matches[0].bindings["KEY"].text, "timeout");
559 assert_eq!(matches[0].bindings["DEFAULT"].text, "30");
560 assert_eq!(matches[1].bindings["SRC"].text, "opts");
561 assert_eq!(matches[0].text, "cfg?.timeout ?? 30");
563 assert_eq!(matches[0].span.start_row, 0);
564 assert_eq!(matches[1].span.start_row, 1);
565 }
566
567 #[test]
568 fn nested_matches_do_not_corrupt_or_panic_on_apply() {
569 let compiled = rule(
574 r#"
575 id = "sum-binop"
576 language = "typescript"
577 fix = "sum($X, $Y)"
578 [rule]
579 pattern = "$X + $Y"
580 "#,
581 );
582 assert!(compiled.run("const z = a + b + c;\n").unwrap().len() >= 2);
584 let result = compiled.apply("const z = a + b + c;\n").unwrap();
585 assert_eq!(result.rewritten, "const z = sum(a + b, c);\n");
587 assert_eq!(result.edits.len(), 1);
588 assert!(result.changed);
589 }
590
591 #[test]
592 fn dedupe_overlapping_keeps_outermost_in_document_order() {
593 let span = |s: usize, e: usize| Span {
594 start_byte: s,
595 end_byte: e,
596 start_row: 0,
597 start_col: s,
598 end_row: 0,
599 end_col: e,
600 };
601 let m = |s: usize, e: usize| RuleMatch {
602 rule_id: "r".into(),
603 span: span(s, e),
604 text: String::new(),
605 bindings: BTreeMap::new(),
606 };
607 let kept = dedupe_overlapping(vec![m(0, 5), m(0, 9), m(10, 14)]);
609 let spans: Vec<_> = kept
610 .iter()
611 .map(|m| (m.span.start_byte, m.span.end_byte))
612 .collect();
613 assert_eq!(spans, vec![(0, 9), (10, 14)]);
614 }
615
616 #[test]
617 fn kind_rule_matches_node_kind() {
618 let compiled = rule(
619 r#"
620 id = "find-calls"
621 language = "python"
622 [rule]
623 kind = "call"
624 "#,
625 );
626 let matches = compiled.run("print(x)\nlog(y)\n").unwrap();
627 assert_eq!(matches.len(), 2);
628 assert_eq!(matches[0].text, "print(x)");
629 assert!(matches[0].bindings.is_empty());
630 }
631
632 #[test]
633 fn regex_rule_matches_text() {
634 let compiled = rule(
635 r#"
636 id = "todo"
637 language = "rust"
638 message = "Found a TODO"
639 [rule]
640 regex = "TODO\\(\\w+\\)"
641 "#,
642 );
643 let matches = compiled
644 .run("fn f() {\n // TODO(ken) fix\n // todo lower\n}\n")
645 .unwrap();
646 assert_eq!(matches.len(), 1);
647 assert_eq!(matches[0].text, "TODO(ken)");
648 assert_eq!(matches[0].span.start_row, 1);
649 }
650
651 #[test]
652 fn unknown_language_is_an_error() {
653 let parsed = Rule::from_toml_str(
654 r#"
655 id = "x"
656 language = "cobol"
657 [rule]
658 kind = "foo"
659 "#,
660 )
661 .unwrap();
662 assert!(matches!(
663 CompiledRule::compile(&parsed),
664 Err(RulesError::UnknownLanguage { .. })
665 ));
666 }
667
668 #[test]
669 fn invalid_pattern_surfaces_compile_error() {
670 let parsed = Rule::from_toml_str(
671 r#"
672 id = "x"
673 language = "typescript"
674 [rule]
675 pattern = "foo($$$ARGS)"
676 "#,
677 )
678 .unwrap();
679 assert!(matches!(
680 CompiledRule::compile(&parsed),
681 Err(RulesError::PatternCompile { .. })
682 ));
683 }
684
685 #[test]
686 fn harn_resolves_same_named_call_sites_by_binding_identity() {
687 let compiled = rule(
688 r#"
689 id = "top-level-target"
690 language = "harn"
691 [rule]
692 pattern = "$FN($ARG)"
693
694 [[where]]
695 metavar = "FN"
696 resolvesTo = { name = "target", kind = "fn", line = 1 }
697 "#,
698 );
699 let source = r"fn target(value: int) -> int {
700 return value
701}
702
703fn call_shadowed(target: fn(int) -> int) {
704 target(1)
705}
706
707fn call_global() {
708 target(2)
709}
710";
711 let matches = compiled.run(source).unwrap();
712 assert_eq!(matches.len(), 1);
713 assert_eq!(matches[0].text, "target(2)");
714 let binding = &matches[0].bindings["FN"];
715 let resolved = binding.metadata.resolved.as_ref().unwrap();
716 assert_eq!(resolved.name, "target");
717 assert_eq!(resolved.kind, "fn");
718 assert_eq!(resolved.span.start_row, 0);
719 assert_eq!(binding.metadata.ty.as_deref(), Some("fn(int) -> int"));
720 }
721
722 #[test]
723 fn harn_capture_type_constraint_filters_matches() {
724 let compiled = rule(
725 r#"
726 id = "int-logs"
727 language = "harn"
728 [rule]
729 pattern = "log($VALUE)"
730
731 [[where]]
732 metavar = "VALUE"
733 type = "int"
734 "#,
735 );
736 let source = r#"fn main() {
737 let count: int = 1
738 let label: string = "one"
739 log(count)
740 log(label)
741}
742"#;
743 let matches = compiled.run(source).unwrap();
744 assert_eq!(matches.len(), 1);
745 let value = &matches[0].bindings["VALUE"];
746 assert_eq!(value.text, "count");
747 assert_eq!(value.metadata.ty.as_deref(), Some("int"));
748 assert_eq!(
749 value
750 .metadata
751 .resolved
752 .as_ref()
753 .map(|resolved| resolved.kind.as_str()),
754 Some("let")
755 );
756 }
757
758 #[test]
759 fn harn_initializer_uses_outer_binding_scope() {
760 let compiled = rule(
761 r#"
762 id = "outer-initializer"
763 language = "harn"
764 [rule]
765 pattern = "log($VALUE)"
766
767 [[where]]
768 metavar = "VALUE"
769 resolvesTo = { name = "value", kind = "let", line = 2 }
770 "#,
771 );
772 let source = r"fn main() {
773 let value: int = 1
774 if true {
775 let value: string = log(value)
776 }
777}
778";
779 let matches = compiled.run(source).unwrap();
780 assert_eq!(matches.len(), 1);
781 let value = &matches[0].bindings["VALUE"];
782 assert_eq!(value.text, "value");
783 assert_eq!(
784 value
785 .metadata
786 .resolved
787 .as_ref()
788 .map(|resolved| resolved.span.start_row),
789 Some(1)
790 );
791 }
792}