1use regex::Regex;
2use serde::Deserialize;
3use std::collections::HashSet;
4use std::rc::Rc;
5
6use crate::{
7 linter::{range_from_tree_sitter, Context, RuleLinter, RuleViolation},
8 rules::{Rule, RuleType},
9};
10
11#[derive(Debug, PartialEq, Clone, Deserialize)]
13pub struct MD044ProperNamesTable {
14 #[serde(default)]
15 pub names: Vec<String>,
16 #[serde(default)]
17 pub code_blocks: bool,
18 #[serde(default)]
19 pub html_elements: bool,
20}
21
22impl Default for MD044ProperNamesTable {
23 fn default() -> Self {
24 Self {
25 names: Vec::new(),
26 code_blocks: true,
27 html_elements: true,
28 }
29 }
30}
31
32pub(crate) struct MD044Linter {
33 context: Rc<Context>,
34 violations: Vec<RuleViolation>,
35 name_regexes: Vec<(String, Regex)>, all_names: HashSet<String>, }
38
39impl MD044Linter {
40 pub fn new(context: Rc<Context>) -> Self {
41 let config = &context.config.linters.settings.proper_names;
42 let mut name_regexes = Vec::new();
43
44 let all_names: HashSet<String> = config.names.iter().cloned().collect();
46
47 let mut names = config.names.clone();
49 names.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a.cmp(b)));
50
51 for name in names {
52 if !name.is_empty() {
53 if let Ok(regex) = create_name_regex(&name) {
55 name_regexes.push((name, regex));
56 }
57 }
58 }
59
60 Self {
61 context,
62 violations: Vec::new(),
63 name_regexes,
64 all_names,
65 }
66 }
67
68 fn should_check_node(&self, node_kind: &str) -> bool {
69 let config = &self.context.config.linters.settings.proper_names;
70
71 match node_kind {
72 "fenced_code_block" | "indented_code_block" | "code_span" => config.code_blocks,
74 "html_block" | "html_inline" => config.html_elements,
76 "text" | "paragraph" => true,
78 _ => false,
79 }
80 }
81
82 fn check_text_content(
85 &self,
86 text: &str,
87 start_line: usize,
88 start_column: usize,
89 ) -> Vec<RuleViolation> {
90 if self.name_regexes.is_empty() {
91 return Vec::new();
92 }
93
94 let mut violations = Vec::new();
95 let mut exclusion_ranges: Vec<(usize, usize)> = Vec::new(); for (expected_name, regex) in &self.name_regexes {
98 for match_result in regex.find_iter(text) {
99 let matched_text = match_result.as_str();
100 let match_start = match_result.start();
101 let match_end = match_result.end();
102
103 let overlaps = exclusion_ranges
105 .iter()
106 .any(|(start, end)| !(match_end <= *start || match_start >= *end));
107
108 if overlaps {
109 continue;
110 }
111
112 if self.all_names.contains(matched_text) {
114 exclusion_ranges.push((match_start, match_end));
116 continue;
117 }
118
119 let range = tree_sitter::Range {
121 start_byte: match_start,
122 end_byte: match_end,
123 start_point: tree_sitter::Point {
124 row: start_line,
125 column: start_column + match_start,
126 },
127 end_point: tree_sitter::Point {
128 row: start_line,
129 column: start_column + match_end,
130 },
131 };
132
133 violations.push(RuleViolation::new(
134 &MD044,
135 format!("Expected: {expected_name}; Actual: {matched_text}"),
136 self.context.file_path.clone(),
137 range_from_tree_sitter(&range),
138 ));
139
140 exclusion_ranges.push((match_start, match_end));
142 }
143 }
144 violations
145 }
146}
147
148impl RuleLinter for MD044Linter {
149 fn feed(&mut self, node: &tree_sitter::Node) {
150 if !self.should_check_node(node.kind()) {
151 return;
152 }
153
154 let source = self.context.get_document_content();
155 let start_byte = node.start_byte();
156 let end_byte = node.end_byte();
157
158 if end_byte <= source.len() {
159 let text_slice = &source[start_byte..end_byte];
163 let start_line = node.start_position().row;
164 let start_column = node.start_position().column;
165
166 let new_violations = self.check_text_content(text_slice, start_line, start_column);
167 self.violations.extend(new_violations);
168 }
169 }
170
171 fn finalize(&mut self) -> Vec<RuleViolation> {
172 std::mem::take(&mut self.violations)
173 }
174}
175
176fn create_name_regex(name: &str) -> Result<Regex, regex::Error> {
178 let escaped_name = regex::escape(name);
179
180 let starts_with_word_char = name.chars().next().is_some_and(is_word_char);
183 let ends_with_word_char = name.chars().last().is_some_and(is_word_char);
184
185 let start_boundary = if starts_with_word_char { "\\b_*" } else { "" };
186 let end_boundary = if ends_with_word_char { "_*\\b" } else { "" };
187
188 let pattern = format!("(?i){start_boundary}{escaped_name}{end_boundary}");
190 Regex::new(&pattern)
191}
192
193fn is_word_char(c: char) -> bool {
195 c.is_alphanumeric() || c == '_'
196}
197
198pub const MD044: Rule = Rule {
199 id: "MD044",
200 alias: "proper-names",
201 tags: &["spelling"],
202 description: "Proper names should have the correct capitalization",
203 rule_type: RuleType::Token, required_nodes: &[
205 "text",
206 "paragraph",
207 "fenced_code_block",
208 "indented_code_block",
209 "code_span",
210 "html_block",
211 "html_inline",
212 ],
213 new_linter: |context| Box::new(MD044Linter::new(context)),
214};
215
216#[cfg(test)]
217mod test {
218 use crate::config::{LintersSettingsTable, MD044ProperNamesTable, RuleSeverity};
219 use crate::linter::MultiRuleLinter;
220 use crate::test_utils::test_helpers::test_config_with_settings;
221 use std::path::PathBuf;
222
223 fn test_config(
224 names: Vec<String>,
225 code_blocks: bool,
226 html_elements: bool,
227 ) -> crate::config::QuickmarkConfig {
228 test_config_with_settings(
229 vec![("proper-names", RuleSeverity::Error)],
230 LintersSettingsTable {
231 proper_names: MD044ProperNamesTable {
232 names,
233 code_blocks,
234 html_elements,
235 },
236 ..Default::default()
237 },
238 )
239 }
240
241 #[test]
242 fn test_no_names_configured() {
243 let config = test_config(vec![], true, true);
244 let input = "This contains javascript and GitHub text.";
245
246 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
247 let violations = linter.analyze();
248 assert_eq!(violations.len(), 0);
249 }
250
251 #[test]
252 fn test_exact_match_no_violations() {
253 let config = test_config(
254 vec!["JavaScript".to_string(), "GitHub".to_string()],
255 true,
256 true,
257 );
258 let input = "This text contains JavaScript and GitHub properly capitalized.";
259
260 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
261 let violations = linter.analyze();
262 assert_eq!(violations.len(), 0);
263 }
264
265 #[test]
266 fn test_incorrect_capitalization() {
267 let config = test_config(vec!["JavaScript".to_string()], true, true);
268 let input = "This text contains javascript with incorrect capitalization.";
269
270 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
271 let violations = linter.analyze();
272 assert_eq!(violations.len(), 1);
273 assert!(violations[0].message().contains("Expected: JavaScript"));
274 assert!(violations[0].message().contains("Actual: javascript"));
275 }
276
277 #[test]
278 fn test_multiple_violations() {
279 let config = test_config(
280 vec!["JavaScript".to_string(), "GitHub".to_string()],
281 true,
282 true,
283 );
284 let input = "We use javascript and github for development.";
285
286 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
287 let violations = linter.analyze();
288 assert_eq!(violations.len(), 2);
289 }
290
291 #[test]
292 fn test_code_blocks_enabled() {
293 let config = test_config(vec!["JavaScript".to_string()], true, true);
294 let input = "```\nlet x = javascript;\n```";
295
296 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
297 let violations = linter.analyze();
298 assert_eq!(violations.len(), 1);
299 }
300
301 #[test]
302 fn test_code_blocks_disabled() {
303 let config = test_config(vec!["JavaScript".to_string()], false, true);
304 let input = "```\nlet x = javascript;\n```";
305
306 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
307 let violations = linter.analyze();
308 assert_eq!(violations.len(), 0);
309 }
310
311 #[test]
312 fn test_html_elements_enabled() {
313 let config = test_config(vec!["JavaScript".to_string()], true, true);
314 let input = "<p>We use javascript here</p>";
315
316 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
317 let violations = linter.analyze();
318 assert_eq!(violations.len(), 1);
319 }
320
321 #[test]
322 fn test_html_elements_disabled() {
323 let config = test_config(vec!["JavaScript".to_string()], true, false);
324 let input = "<p>We use javascript here</p>";
325
326 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
327 let violations = linter.analyze();
328 assert_eq!(violations.len(), 0);
329 }
330
331 #[test]
332 fn test_word_boundaries() {
333 let config = test_config(vec!["JavaScript".to_string()], true, true);
334 let input = "The javascriptish language is not javascript.";
335
336 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
337 let violations = linter.analyze();
338 assert_eq!(violations.len(), 1); }
340
341 #[test]
342 fn test_sorting_by_length() {
343 let config = test_config(vec!["GitHub".to_string(), "git".to_string()], true, true);
345 let input = "We use github for version control.";
346
347 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
348 let violations = linter.analyze();
349 assert_eq!(violations.len(), 1);
350 assert!(violations[0].message().contains("Expected: GitHub"));
351 }
352
353 #[test]
354 fn test_mixed_case_names() {
355 let config = test_config(
356 vec!["GitHub".to_string(), "github.com".to_string()],
357 true,
358 true,
359 );
360 let input = "Visit github.com or use GITHUB for repos.";
361
362 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
363 let violations = linter.analyze();
364 assert_eq!(violations.len(), 1); assert!(violations[0].message().contains("Expected: GitHub"));
366 assert!(violations[0].message().contains("Actual: GITHUB"));
367 }
368}