1use anyhow::Result;
2use serde::Deserialize;
3use std::path::Path;
4
5use crate::checker::{Diagnostic, Severity};
6
7#[derive(Debug, Deserialize, Clone)]
9pub struct StyleRule {
10 pub id: String,
12 pub message: String,
14 #[serde(default = "default_severity")]
16 pub severity: String,
17 #[serde(flatten)]
19 pub pattern: PatternType,
20 pub suggestion: Option<String>,
22}
23
24#[derive(Debug, Deserialize, Clone)]
25#[serde(tag = "type")]
26pub enum PatternType {
27 #[serde(rename = "existence")]
29 Existence {
30 tokens: Vec<String>,
31 #[serde(default)]
32 ignorecase: bool,
33 },
34 #[serde(rename = "pattern")]
36 Pattern { regex: String },
37 #[serde(rename = "substitution")]
39 Substitution {
40 swap: std::collections::HashMap<String, String>,
41 #[serde(default)]
42 ignorecase: bool,
43 },
44}
45
46fn default_severity() -> String {
47 "warning".to_string()
48}
49
50pub struct StyleRuleEngine {
52 rules: Vec<StyleRule>,
53}
54
55impl Default for StyleRuleEngine {
56 fn default() -> Self {
57 Self::new()
58 }
59}
60
61impl StyleRuleEngine {
62 #[must_use]
63 pub const fn new() -> Self {
64 Self { rules: Vec::new() }
65 }
66
67 pub fn load_file(&mut self, path: &Path) -> Result<usize> {
69 let content = std::fs::read_to_string(path)?;
70 self.load_yaml(&content)
71 }
72
73 pub fn load_yaml(&mut self, yaml: &str) -> Result<usize> {
75 let rules: Vec<StyleRule> = serde_yaml::from_str(yaml)?;
76 let count = rules.len();
77 self.rules.extend(rules);
78 Ok(count)
79 }
80
81 pub fn load_dir(&mut self, dir: &Path) -> Result<usize> {
83 let mut total = 0;
84 if !dir.exists() {
85 return Ok(0);
86 }
87 for entry in std::fs::read_dir(dir)? {
88 let entry = entry?;
89 let path = entry.path();
90 if let Some(ext) = path.extension().and_then(|e| e.to_str())
91 && (ext == "yaml" || ext == "yml")
92 {
93 total += self.load_file(&path)?;
94 }
95 }
96 Ok(total)
97 }
98
99 #[must_use]
101 pub const fn rule_count(&self) -> usize {
102 self.rules.len()
103 }
104
105 #[must_use]
107 pub fn check(&self, text: &str) -> Vec<Diagnostic> {
108 let mut diagnostics = Vec::new();
109
110 for rule in &self.rules {
111 match &rule.pattern {
112 PatternType::Existence { tokens, ignorecase } => {
113 for token in tokens {
114 Self::find_token_matches(text, token, *ignorecase, rule, &mut diagnostics);
115 }
116 }
117 PatternType::Pattern { regex } => {
118 if let Ok(re) = regex::Regex::new(regex) {
119 for m in re.find_iter(text) {
120 let suggestions = rule
121 .suggestion
122 .as_ref()
123 .map_or_else(Vec::new, |s| vec![s.clone()]);
124 diagnostics.push(Self::make_diagnostic(
125 rule,
126 m.start(),
127 m.end(),
128 suggestions,
129 ));
130 }
131 }
132 }
133 PatternType::Substitution { swap, ignorecase } => {
134 for (from, to) in swap {
135 Self::find_token_matches_with_suggestion(
136 text,
137 from,
138 *ignorecase,
139 rule,
140 to,
141 &mut diagnostics,
142 );
143 }
144 }
145 }
146 }
147
148 diagnostics
149 }
150
151 fn find_token_matches(
152 text: &str,
153 token: &str,
154 ignorecase: bool,
155 rule: &StyleRule,
156 diagnostics: &mut Vec<Diagnostic>,
157 ) {
158 Self::find_token_matches_with_suggestion(
159 text,
160 token,
161 ignorecase,
162 rule,
163 rule.suggestion.as_deref().unwrap_or_default(),
164 diagnostics,
165 );
166 }
167
168 fn find_token_matches_with_suggestion(
169 text: &str,
170 token: &str,
171 ignorecase: bool,
172 rule: &StyleRule,
173 suggestion: &str,
174 diagnostics: &mut Vec<Diagnostic>,
175 ) {
176 let search_text = if ignorecase {
177 text.to_lowercase()
178 } else {
179 text.to_string()
180 };
181 let search_token = if ignorecase {
182 token.to_lowercase()
183 } else {
184 token.to_string()
185 };
186
187 let mut start = 0;
188 while let Some(pos) = search_text[start..].find(&search_token) {
189 let abs_pos = start + pos;
190 let end_pos = abs_pos + token.len();
191
192 let at_word_start =
194 abs_pos == 0 || !text.as_bytes()[abs_pos - 1].is_ascii_alphanumeric();
195 let at_word_end = end_pos >= text.len()
196 || !text.as_bytes()[end_pos.min(text.len() - 1)].is_ascii_alphanumeric();
197
198 if at_word_start && at_word_end {
199 let suggestions = if suggestion.is_empty() {
200 vec![]
201 } else {
202 vec![suggestion.to_string()]
203 };
204 diagnostics.push(Self::make_diagnostic(rule, abs_pos, end_pos, suggestions));
205 }
206
207 start = abs_pos + 1;
208 }
209 }
210
211 fn make_diagnostic(
212 rule: &StyleRule,
213 start: usize,
214 end: usize,
215 suggestions: Vec<String>,
216 ) -> Diagnostic {
217 let severity = match rule.severity.as_str() {
218 "error" => Severity::Error as i32,
219 "info" => Severity::Information as i32,
220 "hint" => Severity::Hint as i32,
221 _ => Severity::Warning as i32,
222 };
223
224 Diagnostic {
225 #[allow(clippy::cast_possible_truncation)]
226 start_byte: start as u32,
227 #[allow(clippy::cast_possible_truncation)]
228 end_byte: end as u32,
229 message: rule.message.clone(),
230 suggestions,
231 rule_id: rule.id.clone(),
232 severity,
233 unified_id: format!("style.custom.{}", rule.id),
234 confidence: 0.9,
235 }
236 }
237}
238
239#[cfg(test)]
240mod tests {
241 use super::*;
242
243 const EXISTENCE_YAML: &str = r#"
244- id: no-jargon
245 message: "Avoid jargon"
246 severity: warning
247 type: existence
248 tokens:
249 - leverage
250 - synergy
251 - paradigm
252 ignorecase: true
253"#;
254
255 const SUBSTITUTION_YAML: &str = r#"
256- id: contractions
257 message: "Use the expanded form"
258 severity: info
259 type: substitution
260 swap:
261 "don't": "do not"
262 "can't": "cannot"
263 "won't": "will not"
264 ignorecase: false
265"#;
266
267 const PATTERN_YAML: &str = r#"
268- id: no-passive
269 message: "Avoid passive voice"
270 severity: warning
271 type: pattern
272 regex: '\b(was|were|been|being)\s+\w+ed\b'
273"#;
274
275 #[test]
276 fn load_existence_rules() {
277 let mut engine = StyleRuleEngine::new();
278 let count = engine.load_yaml(EXISTENCE_YAML).unwrap();
279 assert_eq!(count, 1);
280 assert_eq!(engine.rule_count(), 1);
281 }
282
283 #[test]
284 fn existence_match() {
285 let mut engine = StyleRuleEngine::new();
286 engine.load_yaml(EXISTENCE_YAML).unwrap();
287 let diagnostics = engine.check("We should leverage our synergy.");
288 assert_eq!(diagnostics.len(), 2);
289 assert!(diagnostics.iter().any(|d| d.rule_id == "no-jargon"));
290 }
291
292 #[test]
293 fn existence_ignorecase() {
294 let mut engine = StyleRuleEngine::new();
295 engine.load_yaml(EXISTENCE_YAML).unwrap();
296 let diagnostics = engine.check("LEVERAGE the Paradigm.");
297 assert_eq!(diagnostics.len(), 2);
298 }
299
300 #[test]
301 fn existence_word_boundary() {
302 let mut engine = StyleRuleEngine::new();
303 engine.load_yaml(EXISTENCE_YAML).unwrap();
304 let diagnostics = engine.check("They leveraged their position.");
306 assert_eq!(diagnostics.len(), 0);
307 }
308
309 #[test]
310 fn substitution_match() {
311 let mut engine = StyleRuleEngine::new();
312 engine.load_yaml(SUBSTITUTION_YAML).unwrap();
313 let diagnostics = engine.check("You don't need to worry.");
314 assert_eq!(diagnostics.len(), 1);
315 assert_eq!(diagnostics[0].suggestions, vec!["do not"]);
316 }
317
318 #[test]
319 fn pattern_match() {
320 let mut engine = StyleRuleEngine::new();
321 engine.load_yaml(PATTERN_YAML).unwrap();
322 let diagnostics = engine.check("The ball was kicked by the player.");
323 assert_eq!(diagnostics.len(), 1);
324 assert_eq!(diagnostics[0].rule_id, "no-passive");
325 }
326
327 #[test]
328 fn no_matches_on_clean_text() {
329 let mut engine = StyleRuleEngine::new();
330 engine.load_yaml(EXISTENCE_YAML).unwrap();
331 let diagnostics = engine.check("The quick brown fox jumped over the lazy dog.");
332 assert!(diagnostics.is_empty());
333 }
334
335 #[test]
336 fn multiple_rule_files() {
337 let mut engine = StyleRuleEngine::new();
338 engine.load_yaml(EXISTENCE_YAML).unwrap();
339 engine.load_yaml(SUBSTITUTION_YAML).unwrap();
340 engine.load_yaml(PATTERN_YAML).unwrap();
341 assert_eq!(engine.rule_count(), 3);
342 }
343}