codemod_core/pattern/mod.rs
1//! Pattern inference, matching, and validation.
2//!
3//! This module contains the core algorithms for:
4//!
5//! - **Inference** ([`inferrer`]): Deriving transformation patterns from
6//! before/after example pairs by performing a structural diff of their ASTs.
7//! - **Matching** ([`matcher`]): Finding occurrences of a pattern in arbitrary
8//! source code by comparing AST sub-trees.
9//! - **Validation** ([`validator`]): Checking that an inferred pattern is
10//! well-formed and likely to produce correct transformations.
11
12pub mod inferrer;
13pub mod matcher;
14pub mod validator;
15
16pub use inferrer::PatternInferrer;
17pub use matcher::PatternMatcher;
18pub use validator::PatternValidator;
19
20use serde::{Deserialize, Serialize};
21
22/// Represents a pattern variable that matches any expression or identifier
23/// at a particular position in the AST.
24///
25/// During inference, differing leaf nodes (identifiers, literals) between the
26/// before and after examples are extracted as pattern variables. Each variable
27/// has a unique name (e.g. `$id`, `$expr1`) and an optional constraint on the
28/// tree-sitter node type it must match.
29#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
30pub struct PatternVar {
31 /// Variable name (e.g. `"$id"`, `"$expr"`).
32 pub name: String,
33 /// Optional constraint on the tree-sitter node type this variable must
34 /// match (e.g. `"identifier"`, `"string_literal"`).
35 pub node_type: Option<String>,
36}
37
38/// Represents an inferred transformation pattern.
39///
40/// A `Pattern` is the central data structure of the codemod engine. It captures
41/// *what* to look for in source code (`before_template`) and *what* to replace
42/// it with (`after_template`), using [`PatternVar`]s as placeholders for
43/// varying sub-expressions.
44///
45/// ## Template syntax
46///
47/// Variables are written as `$name` inside the template strings. For example:
48///
49/// ```text
50/// before: "println!($fmt, $arg)"
51/// after: "log::info!($fmt, $arg)"
52/// ```
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct Pattern {
55 /// The "before" pattern template with `$variable` placeholders.
56 pub before_template: String,
57 /// The "after" pattern template with `$variable` placeholders.
58 pub after_template: String,
59 /// Variables extracted during inference.
60 pub variables: Vec<PatternVar>,
61 /// The source language identifier (e.g. `"rust"`, `"javascript"`).
62 pub language: String,
63 /// Confidence score of the inference in the range `[0.0, 1.0]`.
64 ///
65 /// Higher values indicate that the inferred pattern is more likely to be
66 /// correct and generalizable.
67 pub confidence: f64,
68}
69
70impl Pattern {
71 /// Creates a new pattern with the given templates and variables.
72 pub fn new(
73 before_template: String,
74 after_template: String,
75 variables: Vec<PatternVar>,
76 language: String,
77 confidence: f64,
78 ) -> Self {
79 Self {
80 before_template,
81 after_template,
82 variables,
83 language,
84 confidence,
85 }
86 }
87
88 /// Returns `true` if this pattern contains at least one variable.
89 pub fn has_variables(&self) -> bool {
90 !self.variables.is_empty()
91 }
92
93 /// Returns `true` if the confidence score meets the given threshold.
94 pub fn meets_confidence(&self, threshold: f64) -> bool {
95 self.confidence >= threshold
96 }
97}