Skip to main content

codemod_core/rule/
schema.rs

1//! Rule schema definitions for serialization / deserialization.
2//!
3//! The schema follows a simple YAML structure:
4//!
5//! ```yaml
6//! name: replace-println
7//! description: Replace println! with log::info!
8//! language: rust
9//! version: "1.0"
10//! pattern:
11//!   before: "println!($fmt, $args)"
12//!   after: "log::info!($fmt, $args)"
13//! config:
14//!   include:
15//!     - "src/**/*.rs"
16//!   exclude:
17//!     - "tests/**"
18//!   respect_gitignore: true
19//!   max_file_size: 500000
20//! ```
21
22use serde::{Deserialize, Serialize};
23
24use crate::error::CodemodError;
25
26/// A complete codemod rule, suitable for serialization to/from YAML.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct CodemodRule {
29    /// Unique rule name (kebab-case recommended).
30    pub name: String,
31    /// Human-readable description of what the rule does.
32    pub description: String,
33    /// Target programming language (e.g. `"rust"`, `"javascript"`).
34    pub language: String,
35    /// Semantic version of the rule (default `"1.0"`).
36    #[serde(default = "default_version")]
37    pub version: String,
38    /// The before/after transformation pattern.
39    pub pattern: RulePattern,
40    /// Optional scanning configuration.
41    #[serde(default)]
42    pub config: RuleConfig,
43}
44
45/// Default version string.
46fn default_version() -> String {
47    "1.0".to_string()
48}
49
50/// The before/after pattern inside a rule.
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct RulePattern {
53    /// Source pattern (what to look for).
54    pub before: String,
55    /// Replacement pattern (what to replace it with).
56    pub after: String,
57}
58
59/// Scanning / filtering configuration embedded in a rule.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct RuleConfig {
62    /// Glob patterns for files to include.
63    #[serde(default)]
64    pub include: Vec<String>,
65    /// Glob patterns for files to exclude.
66    #[serde(default)]
67    pub exclude: Vec<String>,
68    /// Whether to respect `.gitignore` during scanning.
69    #[serde(default = "default_true")]
70    pub respect_gitignore: bool,
71    /// Optional maximum file size (in bytes) for scanning.
72    #[serde(default)]
73    pub max_file_size: Option<usize>,
74}
75
76impl Default for RuleConfig {
77    fn default() -> Self {
78        Self {
79            include: Vec::new(),
80            exclude: Vec::new(),
81            respect_gitignore: true,
82            max_file_size: None,
83        }
84    }
85}
86
87/// Default value for boolean fields that should be `true`.
88fn default_true() -> bool {
89    true
90}
91
92impl CodemodRule {
93    /// Validate the rule, checking for missing or inconsistent fields.
94    ///
95    /// # Errors
96    ///
97    /// Returns [`CodemodError::Rule`] if validation fails.
98    pub fn validate(&self) -> crate::Result<()> {
99        if self.name.trim().is_empty() {
100            return Err(CodemodError::Rule("Rule name must not be empty".into()));
101        }
102        if self.language.trim().is_empty() {
103            return Err(CodemodError::Rule("Rule language must not be empty".into()));
104        }
105        if self.pattern.before.trim().is_empty() {
106            return Err(CodemodError::Rule(
107                "Rule pattern.before must not be empty".into(),
108            ));
109        }
110        if self.pattern.after.trim().is_empty() {
111            return Err(CodemodError::Rule(
112                "Rule pattern.after must not be empty".into(),
113            ));
114        }
115        if self.pattern.before == self.pattern.after {
116            return Err(CodemodError::Rule(
117                "Rule pattern.before and pattern.after must not be identical".into(),
118            ));
119        }
120        Ok(())
121    }
122
123    /// Convert this rule into a [`Pattern`](crate::pattern::Pattern) by
124    /// parsing the before/after templates and extracting variables.
125    ///
126    /// Variables are detected by the `$name` syntax in the templates.
127    pub fn to_pattern(&self) -> crate::pattern::Pattern {
128        let variables = Self::extract_variables(&self.pattern.before, &self.pattern.after);
129        crate::pattern::Pattern::new(
130            self.pattern.before.clone(),
131            self.pattern.after.clone(),
132            variables,
133            self.language.clone(),
134            1.0, // user-defined rules get maximum confidence
135        )
136    }
137
138    /// Extract pattern variables from the before and after templates.
139    ///
140    /// A variable is any token matching `$[a-zA-Z_][a-zA-Z0-9_]*`.
141    fn extract_variables(before: &str, after: &str) -> Vec<crate::pattern::PatternVar> {
142        let mut seen = std::collections::HashSet::new();
143        let mut vars = Vec::new();
144
145        for template in &[before, after] {
146            let mut chars = template.chars().peekable();
147            while let Some(ch) = chars.next() {
148                if ch == '$' {
149                    let mut name = String::from('$');
150                    while let Some(&next) = chars.peek() {
151                        if next.is_alphanumeric() || next == '_' {
152                            name.push(next);
153                            chars.next();
154                        } else {
155                            break;
156                        }
157                    }
158                    if name.len() > 1 && seen.insert(name.clone()) {
159                        vars.push(crate::pattern::PatternVar {
160                            name,
161                            node_type: None,
162                        });
163                    }
164                }
165            }
166        }
167
168        vars
169    }
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn test_validate_valid_rule() {
178        let rule = CodemodRule {
179            name: "test".into(),
180            description: "desc".into(),
181            language: "rust".into(),
182            version: "1.0".into(),
183            pattern: RulePattern {
184                before: "old()".into(),
185                after: "new()".into(),
186            },
187            config: RuleConfig::default(),
188        };
189        assert!(rule.validate().is_ok());
190    }
191
192    #[test]
193    fn test_validate_empty_name() {
194        let rule = CodemodRule {
195            name: "".into(),
196            description: "desc".into(),
197            language: "rust".into(),
198            version: "1.0".into(),
199            pattern: RulePattern {
200                before: "old()".into(),
201                after: "new()".into(),
202            },
203            config: RuleConfig::default(),
204        };
205        assert!(rule.validate().is_err());
206    }
207
208    #[test]
209    fn test_validate_identical_patterns() {
210        let rule = CodemodRule {
211            name: "test".into(),
212            description: "desc".into(),
213            language: "rust".into(),
214            version: "1.0".into(),
215            pattern: RulePattern {
216                before: "same()".into(),
217                after: "same()".into(),
218            },
219            config: RuleConfig::default(),
220        };
221        assert!(rule.validate().is_err());
222    }
223
224    #[test]
225    fn test_extract_variables() {
226        let vars = CodemodRule::extract_variables("foo($arg1, $arg2)", "bar($arg1, $arg2)");
227        assert_eq!(vars.len(), 2);
228        assert_eq!(vars[0].name, "$arg1");
229        assert_eq!(vars[1].name, "$arg2");
230    }
231
232    #[test]
233    fn test_extract_variables_dedup() {
234        let vars = CodemodRule::extract_variables("f($x, $x)", "g($x)");
235        assert_eq!(vars.len(), 1);
236    }
237
238    #[test]
239    fn test_to_pattern() {
240        let rule = CodemodRule {
241            name: "test".into(),
242            description: "desc".into(),
243            language: "rust".into(),
244            version: "1.0".into(),
245            pattern: RulePattern {
246                before: "old($x)".into(),
247                after: "new($x)".into(),
248            },
249            config: RuleConfig::default(),
250        };
251        let p = rule.to_pattern();
252        assert_eq!(p.language, "rust");
253        assert_eq!(p.variables.len(), 1);
254        assert_eq!(p.confidence, 1.0);
255    }
256
257    #[test]
258    fn test_default_config() {
259        let cfg = RuleConfig::default();
260        assert!(cfg.include.is_empty());
261        assert!(cfg.exclude.is_empty());
262        assert!(cfg.respect_gitignore);
263        assert!(cfg.max_file_size.is_none());
264    }
265
266    #[test]
267    fn test_yaml_roundtrip() {
268        let rule = CodemodRule {
269            name: "yaml-test".into(),
270            description: "Round-trip test".into(),
271            language: "javascript".into(),
272            version: "2.0".into(),
273            pattern: RulePattern {
274                before: "require($mod)".into(),
275                after: "import $mod".into(),
276            },
277            config: RuleConfig {
278                include: vec!["src/**/*.js".into()],
279                exclude: vec!["dist/**".into()],
280                respect_gitignore: true,
281                max_file_size: Some(500_000),
282            },
283        };
284
285        let yaml = serde_yaml::to_string(&rule).unwrap();
286        let parsed: CodemodRule = serde_yaml::from_str(&yaml).unwrap();
287
288        assert_eq!(parsed.name, "yaml-test");
289        assert_eq!(parsed.config.include, vec!["src/**/*.js"]);
290        assert_eq!(parsed.config.max_file_size, Some(500_000));
291    }
292}