Skip to main content

lang_check/
style_rules.rs

1use anyhow::Result;
2use serde::Deserialize;
3use std::path::Path;
4
5use crate::checker::{Diagnostic, Severity};
6
7/// A declarative style rule loaded from YAML, inspired by Vale.
8#[derive(Debug, Deserialize, Clone)]
9pub struct StyleRule {
10    /// Unique rule identifier (e.g. "custom.no-passive-voice").
11    pub id: String,
12    /// Human-readable message shown to the user.
13    pub message: String,
14    /// Severity level: "error", "warning", "info", "hint".
15    #[serde(default = "default_severity")]
16    pub severity: String,
17    /// The match pattern type.
18    #[serde(flatten)]
19    pub pattern: PatternType,
20    /// Optional replacement suggestion.
21    pub suggestion: Option<String>,
22}
23
24#[derive(Debug, Deserialize, Clone)]
25#[serde(tag = "type")]
26pub enum PatternType {
27    /// Match exact words/phrases (case-insensitive by default).
28    #[serde(rename = "existence")]
29    Existence {
30        tokens: Vec<String>,
31        #[serde(default)]
32        ignorecase: bool,
33    },
34    /// Match a regex pattern.
35    #[serde(rename = "pattern")]
36    Pattern { regex: String },
37    /// Match one token and suggest substitution with another.
38    #[serde(rename = "substitution")]
39    Substitution {
40        swap: std::collections::HashMap<String, String>,
41        #[serde(default)]
42        ignorecase: bool,
43    },
44}
45
46fn default_severity() -> String {
47    "warning".to_string()
48}
49
50/// Engine that applies declarative style rules to prose text.
51pub struct StyleRuleEngine {
52    rules: Vec<StyleRule>,
53}
54
55impl Default for StyleRuleEngine {
56    fn default() -> Self {
57        Self::new()
58    }
59}
60
61impl StyleRuleEngine {
62    #[must_use]
63    pub const fn new() -> Self {
64        Self { rules: Vec::new() }
65    }
66
67    /// Load rules from a YAML file.
68    pub fn load_file(&mut self, path: &Path) -> Result<usize> {
69        let content = std::fs::read_to_string(path)?;
70        self.load_yaml(&content)
71    }
72
73    /// Load rules from a YAML string.
74    pub fn load_yaml(&mut self, yaml: &str) -> Result<usize> {
75        let rules: Vec<StyleRule> = serde_yaml::from_str(yaml)?;
76        let count = rules.len();
77        self.rules.extend(rules);
78        Ok(count)
79    }
80
81    /// Load all `.yaml`/`.yml` files from a directory.
82    pub fn load_dir(&mut self, dir: &Path) -> Result<usize> {
83        let mut total = 0;
84        if !dir.exists() {
85            return Ok(0);
86        }
87        for entry in std::fs::read_dir(dir)? {
88            let entry = entry?;
89            let path = entry.path();
90            if let Some(ext) = path.extension().and_then(|e| e.to_str())
91                && (ext == "yaml" || ext == "yml")
92            {
93                total += self.load_file(&path)?;
94            }
95        }
96        Ok(total)
97    }
98
99    /// Number of loaded rules.
100    #[must_use]
101    pub const fn rule_count(&self) -> usize {
102        self.rules.len()
103    }
104
105    /// Check prose text against all loaded rules.
106    #[must_use]
107    pub fn check(&self, text: &str) -> Vec<Diagnostic> {
108        let mut diagnostics = Vec::new();
109
110        for rule in &self.rules {
111            match &rule.pattern {
112                PatternType::Existence { tokens, ignorecase } => {
113                    for token in tokens {
114                        Self::find_token_matches(text, token, *ignorecase, rule, &mut diagnostics);
115                    }
116                }
117                PatternType::Pattern { regex } => {
118                    if let Ok(re) = regex::Regex::new(regex) {
119                        for m in re.find_iter(text) {
120                            let suggestions = rule
121                                .suggestion
122                                .as_ref()
123                                .map_or_else(Vec::new, |s| vec![s.clone()]);
124                            diagnostics.push(Self::make_diagnostic(
125                                rule,
126                                m.start(),
127                                m.end(),
128                                suggestions,
129                            ));
130                        }
131                    }
132                }
133                PatternType::Substitution { swap, ignorecase } => {
134                    for (from, to) in swap {
135                        Self::find_token_matches_with_suggestion(
136                            text,
137                            from,
138                            *ignorecase,
139                            rule,
140                            to,
141                            &mut diagnostics,
142                        );
143                    }
144                }
145            }
146        }
147
148        diagnostics
149    }
150
151    fn find_token_matches(
152        text: &str,
153        token: &str,
154        ignorecase: bool,
155        rule: &StyleRule,
156        diagnostics: &mut Vec<Diagnostic>,
157    ) {
158        Self::find_token_matches_with_suggestion(
159            text,
160            token,
161            ignorecase,
162            rule,
163            rule.suggestion.as_deref().unwrap_or_default(),
164            diagnostics,
165        );
166    }
167
168    fn find_token_matches_with_suggestion(
169        text: &str,
170        token: &str,
171        ignorecase: bool,
172        rule: &StyleRule,
173        suggestion: &str,
174        diagnostics: &mut Vec<Diagnostic>,
175    ) {
176        let search_text = if ignorecase {
177            text.to_lowercase()
178        } else {
179            text.to_string()
180        };
181        let search_token = if ignorecase {
182            token.to_lowercase()
183        } else {
184            token.to_string()
185        };
186
187        let mut start = 0;
188        while let Some(pos) = search_text[start..].find(&search_token) {
189            let abs_pos = start + pos;
190            let end_pos = abs_pos + token.len();
191
192            // Ensure word boundary match (not part of a larger word)
193            let at_word_start =
194                abs_pos == 0 || !text.as_bytes()[abs_pos - 1].is_ascii_alphanumeric();
195            let at_word_end = end_pos >= text.len()
196                || !text.as_bytes()[end_pos.min(text.len() - 1)].is_ascii_alphanumeric();
197
198            if at_word_start && at_word_end {
199                let suggestions = if suggestion.is_empty() {
200                    vec![]
201                } else {
202                    vec![suggestion.to_string()]
203                };
204                diagnostics.push(Self::make_diagnostic(rule, abs_pos, end_pos, suggestions));
205            }
206
207            start = abs_pos + 1;
208        }
209    }
210
211    fn make_diagnostic(
212        rule: &StyleRule,
213        start: usize,
214        end: usize,
215        suggestions: Vec<String>,
216    ) -> Diagnostic {
217        let severity = match rule.severity.as_str() {
218            "error" => Severity::Error as i32,
219            "info" => Severity::Information as i32,
220            "hint" => Severity::Hint as i32,
221            _ => Severity::Warning as i32,
222        };
223
224        Diagnostic {
225            #[allow(clippy::cast_possible_truncation)]
226            start_byte: start as u32,
227            #[allow(clippy::cast_possible_truncation)]
228            end_byte: end as u32,
229            message: rule.message.clone(),
230            suggestions,
231            rule_id: rule.id.clone(),
232            severity,
233            unified_id: format!("style.custom.{}", rule.id),
234            confidence: 0.9,
235        }
236    }
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242
243    const EXISTENCE_YAML: &str = r#"
244- id: no-jargon
245  message: "Avoid jargon"
246  severity: warning
247  type: existence
248  tokens:
249    - leverage
250    - synergy
251    - paradigm
252  ignorecase: true
253"#;
254
255    const SUBSTITUTION_YAML: &str = r#"
256- id: contractions
257  message: "Use the expanded form"
258  severity: info
259  type: substitution
260  swap:
261    "don't": "do not"
262    "can't": "cannot"
263    "won't": "will not"
264  ignorecase: false
265"#;
266
267    const PATTERN_YAML: &str = r#"
268- id: no-passive
269  message: "Avoid passive voice"
270  severity: warning
271  type: pattern
272  regex: '\b(was|were|been|being)\s+\w+ed\b'
273"#;
274
275    #[test]
276    fn load_existence_rules() {
277        let mut engine = StyleRuleEngine::new();
278        let count = engine.load_yaml(EXISTENCE_YAML).unwrap();
279        assert_eq!(count, 1);
280        assert_eq!(engine.rule_count(), 1);
281    }
282
283    #[test]
284    fn existence_match() {
285        let mut engine = StyleRuleEngine::new();
286        engine.load_yaml(EXISTENCE_YAML).unwrap();
287        let diagnostics = engine.check("We should leverage our synergy.");
288        assert_eq!(diagnostics.len(), 2);
289        assert!(diagnostics.iter().any(|d| d.rule_id == "no-jargon"));
290    }
291
292    #[test]
293    fn existence_ignorecase() {
294        let mut engine = StyleRuleEngine::new();
295        engine.load_yaml(EXISTENCE_YAML).unwrap();
296        let diagnostics = engine.check("LEVERAGE the Paradigm.");
297        assert_eq!(diagnostics.len(), 2);
298    }
299
300    #[test]
301    fn existence_word_boundary() {
302        let mut engine = StyleRuleEngine::new();
303        engine.load_yaml(EXISTENCE_YAML).unwrap();
304        // "leveraged" should NOT match "leverage" due to word boundary
305        let diagnostics = engine.check("They leveraged their position.");
306        assert_eq!(diagnostics.len(), 0);
307    }
308
309    #[test]
310    fn substitution_match() {
311        let mut engine = StyleRuleEngine::new();
312        engine.load_yaml(SUBSTITUTION_YAML).unwrap();
313        let diagnostics = engine.check("You don't need to worry.");
314        assert_eq!(diagnostics.len(), 1);
315        assert_eq!(diagnostics[0].suggestions, vec!["do not"]);
316    }
317
318    #[test]
319    fn pattern_match() {
320        let mut engine = StyleRuleEngine::new();
321        engine.load_yaml(PATTERN_YAML).unwrap();
322        let diagnostics = engine.check("The ball was kicked by the player.");
323        assert_eq!(diagnostics.len(), 1);
324        assert_eq!(diagnostics[0].rule_id, "no-passive");
325    }
326
327    #[test]
328    fn no_matches_on_clean_text() {
329        let mut engine = StyleRuleEngine::new();
330        engine.load_yaml(EXISTENCE_YAML).unwrap();
331        let diagnostics = engine.check("The quick brown fox jumped over the lazy dog.");
332        assert!(diagnostics.is_empty());
333    }
334
335    #[test]
336    fn multiple_rule_files() {
337        let mut engine = StyleRuleEngine::new();
338        engine.load_yaml(EXISTENCE_YAML).unwrap();
339        engine.load_yaml(SUBSTITUTION_YAML).unwrap();
340        engine.load_yaml(PATTERN_YAML).unwrap();
341        assert_eq!(engine.rule_count(), 3);
342    }
343}