1use std::rc::Rc;
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5use tree_sitter::Node;
6
7use crate::{
8 linter::{range_from_tree_sitter, Context, RuleViolation},
9 rules::{Rule, RuleLinter, RuleType},
10};
11
12static ASTERISK_EMPHASIS_REGEX: Lazy<Regex> = Lazy::new(|| {
14 Regex::new(r"(\*{1,3})(\s*)([^*\n]*?)(\s*)(\*{1,3})").expect("Invalid asterisk emphasis regex")
15});
16
17static UNDERSCORE_EMPHASIS_REGEX: Lazy<Regex> = Lazy::new(|| {
18 Regex::new(r"(\_{1,3})(\s*)([^_\n]*?)(\s*)(\_{1,3})")
19 .expect("Invalid underscore emphasis regex")
20});
21
22static CODE_SPAN_REGEX: Lazy<Regex> =
24 Lazy::new(|| Regex::new(r"`[^`\n]*`").expect("Invalid code span regex"));
25
26pub(crate) struct MD037Linter {
27 context: Rc<Context>,
28 violations: Vec<RuleViolation>,
29}
30
31impl MD037Linter {
32 pub fn new(context: Rc<Context>) -> Self {
33 Self {
34 context,
35 violations: Vec::new(),
36 }
37 }
38
39 fn is_in_code_context(&self, node: &Node) -> bool {
40 let mut current = Some(*node);
42 while let Some(node_to_check) = current {
43 match node_to_check.kind() {
44 "code_span" | "fenced_code_block" | "indented_code_block" => {
45 return true;
46 }
47 _ => {
48 current = node_to_check.parent();
49 }
50 }
51 }
52 false
53 }
54
55 fn find_emphasis_violations_in_text(&mut self, node: &Node) {
56 if self.is_in_code_context(node) {
57 return;
58 }
59
60 let start_byte = node.start_byte();
61 let text = {
62 let source = self.context.get_document_content();
63 source[start_byte..node.end_byte()].to_string()
64 };
65
66 let code_span_ranges: Vec<(usize, usize)> = CODE_SPAN_REGEX
68 .find_iter(&text)
69 .map(|m| (m.start(), m.end()))
70 .collect();
71
72 self.check_emphasis_pattern(
74 &text,
75 start_byte,
76 &ASTERISK_EMPHASIS_REGEX,
77 &code_span_ranges,
78 );
79
80 self.check_emphasis_pattern(
82 &text,
83 start_byte,
84 &UNDERSCORE_EMPHASIS_REGEX,
85 &code_span_ranges,
86 );
87 }
88
89 fn check_emphasis_pattern(
90 &mut self,
91 text: &str,
92 text_start_byte: usize,
93 regex: &Regex,
94 code_span_ranges: &[(usize, usize)],
95 ) {
96 for capture in regex.captures_iter(text) {
97 if let (
98 Some(opening_marker),
99 Some(opening_space),
100 Some(_content),
101 Some(closing_space),
102 Some(closing_marker),
103 ) = (
104 capture.get(1),
105 capture.get(2),
106 capture.get(3),
107 capture.get(4),
108 capture.get(5),
109 ) {
110 let match_start = capture.get(0).unwrap().start();
112 let match_end = capture.get(0).unwrap().end();
113
114 let in_code_span = code_span_ranges.iter().any(|(code_start, code_end)| {
115 match_start < *code_end && match_end > *code_start
117 });
118
119 if in_code_span {
120 continue; }
122
123 let opening_text = opening_marker.as_str();
124 let closing_text = closing_marker.as_str();
125
126 if opening_text == closing_text {
128 if !opening_space.as_str().is_empty() {
130 self.create_opening_space_violation(
131 opening_marker,
132 opening_space,
133 text_start_byte,
134 );
135 }
136
137 if !closing_space.as_str().is_empty() {
139 self.create_closing_space_violation(
140 closing_marker,
141 closing_space,
142 text_start_byte,
143 );
144 }
145 }
146 }
147 }
148 }
149
150 fn create_opening_space_violation(
151 &mut self,
152 opening_marker: regex::Match,
153 opening_space: regex::Match,
154 text_start_byte: usize,
155 ) {
156 let marker = opening_marker.as_str();
157 let space = opening_space.as_str();
158 let violation_start = text_start_byte + opening_marker.end();
159 let violation_end = text_start_byte + opening_space.end();
160
161 let range = tree_sitter::Range {
162 start_byte: violation_start,
163 end_byte: violation_end,
164 start_point: self.byte_to_point(violation_start),
165 end_point: self.byte_to_point(violation_end),
166 };
167
168 self.violations.push(RuleViolation::new(
169 &MD037,
170 format!("{} [Context: \"{}{}\"]", MD037.description, marker, space),
171 self.context.file_path.clone(),
172 range_from_tree_sitter(&range),
173 ));
174 }
175
176 fn create_closing_space_violation(
177 &mut self,
178 closing_marker: regex::Match,
179 closing_space: regex::Match,
180 text_start_byte: usize,
181 ) {
182 let marker = closing_marker.as_str();
183 let space = closing_space.as_str();
184 let violation_start = text_start_byte + closing_space.start();
185 let violation_end = text_start_byte + closing_marker.end();
186
187 let range = tree_sitter::Range {
188 start_byte: violation_start,
189 end_byte: violation_end,
190 start_point: self.byte_to_point(violation_start),
191 end_point: self.byte_to_point(violation_end),
192 };
193
194 self.violations.push(RuleViolation::new(
195 &MD037,
196 format!("{} [Context: \"{}{}\"]", MD037.description, space, marker),
197 self.context.file_path.clone(),
198 range_from_tree_sitter(&range),
199 ));
200 }
201
202 fn byte_to_point(&self, byte_pos: usize) -> tree_sitter::Point {
203 let source = self.context.get_document_content();
204 let mut line = 0;
205 let mut column = 0;
206
207 for (i, ch) in source.char_indices() {
208 if i >= byte_pos {
209 break;
210 }
211 if ch == '\n' {
212 line += 1;
213 column = 0;
214 } else {
215 column += 1;
216 }
217 }
218
219 tree_sitter::Point { row: line, column }
220 }
221}
222
223impl RuleLinter for MD037Linter {
224 fn feed(&mut self, node: &Node) {
225 match node.kind() {
226 "text" | "inline" => {
228 self.find_emphasis_violations_in_text(node);
229 }
230 _ => {}
231 }
232 }
233
234 fn finalize(&mut self) -> Vec<RuleViolation> {
235 std::mem::take(&mut self.violations)
236 }
237}
238
239pub const MD037: Rule = Rule {
240 id: "MD037",
241 alias: "no-space-in-emphasis",
242 tags: &["whitespace", "emphasis"],
243 description: "Spaces inside emphasis markers",
244 rule_type: RuleType::Token,
245 required_nodes: &["emphasis", "strong_emphasis"],
246 new_linter: |context| Box::new(MD037Linter::new(context)),
247};
248
249#[cfg(test)]
250mod test {
251 use std::path::PathBuf;
252
253 use crate::config::RuleSeverity;
254 use crate::linter::MultiRuleLinter;
255 use crate::test_utils::test_helpers::test_config_with_rules;
256
257 fn test_config() -> crate::config::QuickmarkConfig {
258 test_config_with_rules(vec![("no-space-in-emphasis", RuleSeverity::Error)])
259 }
260
261 #[test]
262 fn test_no_violations_valid_emphasis() {
263 let config = test_config();
264 let input = "This has *valid emphasis* and **valid strong** text.
265Also _valid emphasis_ and __valid strong__ text.
266And ***valid strong emphasis*** and ___valid strong emphasis___ text.";
267
268 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
269 let violations = linter.analyze();
270 let md037_violations: Vec<_> = violations
271 .iter()
272 .filter(|v| v.rule().id == "MD037")
273 .collect();
274 assert_eq!(md037_violations.len(), 0);
275 }
276
277 #[test]
278 fn test_violations_spaces_inside_single_asterisk() {
279 let config = test_config();
280 let input = "This has * invalid emphasis * with spaces inside.";
281
282 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
283 let violations = linter.analyze();
284 let md037_violations: Vec<_> = violations
285 .iter()
286 .filter(|v| v.rule().id == "MD037")
287 .collect();
288
289 assert_eq!(md037_violations.len(), 2);
291 }
292
293 #[test]
294 fn test_violations_spaces_inside_double_asterisk() {
295 let config = test_config();
296 let input = "This has ** invalid strong ** with spaces inside.";
297
298 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
299 let violations = linter.analyze();
300 let md037_violations: Vec<_> = violations
301 .iter()
302 .filter(|v| v.rule().id == "MD037")
303 .collect();
304
305 assert_eq!(md037_violations.len(), 2);
307 }
308
309 #[test]
310 fn test_violations_spaces_inside_triple_asterisk() {
311 let config = test_config();
312 let input = "This has *** invalid strong emphasis *** with spaces inside.";
313
314 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
315 let violations = linter.analyze();
316 let md037_violations: Vec<_> = violations
317 .iter()
318 .filter(|v| v.rule().id == "MD037")
319 .collect();
320
321 assert_eq!(md037_violations.len(), 2);
323 }
324
325 #[test]
326 fn test_violations_spaces_inside_single_underscore() {
327 let config = test_config();
328 let input = "This has _ invalid emphasis _ with spaces inside.";
329
330 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
331 let violations = linter.analyze();
332 let md037_violations: Vec<_> = violations
333 .iter()
334 .filter(|v| v.rule().id == "MD037")
335 .collect();
336
337 assert_eq!(md037_violations.len(), 2);
339 }
340
341 #[test]
342 fn test_violations_spaces_inside_double_underscore() {
343 let config = test_config();
344 let input = "This has __ invalid strong __ with spaces inside.";
345
346 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
347 let violations = linter.analyze();
348 let md037_violations: Vec<_> = violations
349 .iter()
350 .filter(|v| v.rule().id == "MD037")
351 .collect();
352
353 assert_eq!(md037_violations.len(), 2);
355 }
356
357 #[test]
358 fn test_violations_spaces_inside_triple_underscore() {
359 let config = test_config();
360 let input = "This has ___ invalid strong emphasis ___ with spaces inside.";
361
362 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
363 let violations = linter.analyze();
364 let md037_violations: Vec<_> = violations
365 .iter()
366 .filter(|v| v.rule().id == "MD037")
367 .collect();
368
369 assert_eq!(md037_violations.len(), 2);
371 }
372
373 #[test]
374 fn test_violations_mixed_valid_and_invalid() {
375 let config = test_config();
376 let input = "Mix of *valid* and * invalid * emphasis.
377Also **valid** and ** invalid ** strong.";
378
379 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
380 let violations = linter.analyze();
381 let md037_violations: Vec<_> = violations
382 .iter()
383 .filter(|v| v.rule().id == "MD037")
384 .collect();
385
386 assert_eq!(md037_violations.len(), 4);
388 }
389
390 #[test]
391 fn test_violations_one_sided_spaces() {
392 let config = test_config();
393 let input = "One sided *invalid * and * invalid* emphasis.";
394
395 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
396 let violations = linter.analyze();
397 let md037_violations: Vec<_> = violations
398 .iter()
399 .filter(|v| v.rule().id == "MD037")
400 .collect();
401
402 assert_eq!(md037_violations.len(), 2);
404 }
405
406 #[test]
407 fn test_no_violations_in_code_blocks() {
408 let config = test_config();
409 let input = "Regular text with *valid* emphasis.
410
411```markdown
412This should not trigger * invalid * emphasis in code blocks.
413```
414
415More text with _valid_ emphasis.";
416
417 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
418 let violations = linter.analyze();
419 let md037_violations: Vec<_> = violations
420 .iter()
421 .filter(|v| v.rule().id == "MD037")
422 .collect();
423 assert_eq!(md037_violations.len(), 0);
424 }
425
426 #[test]
427 fn test_no_violations_in_code_spans() {
428 let config = test_config();
429 let input = "Regular text with `* invalid * code spans` should not trigger violations.";
430
431 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
432 let violations = linter.analyze();
433 let md037_violations: Vec<_> = violations
434 .iter()
435 .filter(|v| v.rule().id == "MD037")
436 .collect();
437 assert_eq!(md037_violations.len(), 0);
438 }
439}