1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use crate::utils::regex_cache::get_cached_regex;
7
8const REF_DEF_REGEX_STR: &str = r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#;
10
11mod md050_config;
12use md050_config::MD050Config;
13
14#[derive(Debug, Default, Clone)]
20pub struct MD050StrongStyle {
21 config: MD050Config,
22}
23
24impl MD050StrongStyle {
25 pub fn new(style: StrongStyle) -> Self {
26 Self {
27 config: MD050Config { style },
28 }
29 }
30
31 pub fn from_config_struct(config: MD050Config) -> Self {
32 Self { config }
33 }
34
35 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
37 for link in &ctx.links {
39 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
40 return true;
41 }
42 }
43
44 for image in &ctx.images {
46 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
47 return true;
48 }
49 }
50
51 if let Ok(re) = get_cached_regex(REF_DEF_REGEX_STR) {
53 for m in re.find_iter(ctx.content) {
54 if m.start() <= byte_pos && byte_pos < m.end() {
55 return true;
56 }
57 }
58 }
59
60 false
61 }
62
63 fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
65 for html_tag in ctx.html_tags().iter() {
67 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
70 return true;
71 }
72 }
73 false
74 }
75
76 fn is_in_html_code_content(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
79 let html_tags = ctx.html_tags();
80 let mut open_code_pos: Option<usize> = None;
81
82 for tag in html_tags.iter() {
83 if tag.byte_offset > byte_pos {
85 return open_code_pos.is_some();
86 }
87
88 if tag.tag_name == "code" {
89 if tag.is_self_closing {
90 continue;
92 } else if !tag.is_closing {
93 open_code_pos = Some(tag.byte_end);
95 } else if tag.is_closing && open_code_pos.is_some() {
96 if let Some(open_pos) = open_code_pos
98 && byte_pos >= open_pos
99 && byte_pos < tag.byte_offset
100 {
101 return true;
103 }
104 open_code_pos = None;
105 }
106 }
107 }
108
109 open_code_pos.is_some() && byte_pos >= open_code_pos.unwrap()
111 }
112
113 fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
114 let content = ctx.content;
115
116 let mut first_asterisk = None;
118 for m in BOLD_ASTERISK_REGEX.find_iter(content) {
119 let (line_num, _) = ctx.offset_to_line_col(m.start());
121 let in_front_matter = ctx
122 .line_info(line_num)
123 .map(|info| info.in_front_matter)
124 .unwrap_or(false);
125
126 if !in_front_matter
127 && !ctx.is_in_code_block_or_span(m.start())
128 && !self.is_in_link(ctx, m.start())
129 && !self.is_in_html_tag(ctx, m.start())
130 && !self.is_in_html_code_content(ctx, m.start())
131 {
132 first_asterisk = Some(m);
133 break;
134 }
135 }
136
137 let mut first_underscore = None;
138 for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
139 let (line_num, _) = ctx.offset_to_line_col(m.start());
141 let in_front_matter = ctx
142 .line_info(line_num)
143 .map(|info| info.in_front_matter)
144 .unwrap_or(false);
145
146 if !in_front_matter
147 && !ctx.is_in_code_block_or_span(m.start())
148 && !self.is_in_link(ctx, m.start())
149 && !self.is_in_html_tag(ctx, m.start())
150 && !self.is_in_html_code_content(ctx, m.start())
151 {
152 first_underscore = Some(m);
153 break;
154 }
155 }
156
157 match (first_asterisk, first_underscore) {
158 (Some(a), Some(u)) => {
159 if a.start() < u.start() {
161 Some(StrongStyle::Asterisk)
162 } else {
163 Some(StrongStyle::Underscore)
164 }
165 }
166 (Some(_), None) => Some(StrongStyle::Asterisk),
167 (None, Some(_)) => Some(StrongStyle::Underscore),
168 (None, None) => None,
169 }
170 }
171
172 fn is_escaped(&self, text: &str, pos: usize) -> bool {
173 if pos == 0 {
174 return false;
175 }
176
177 let mut backslash_count = 0;
178 let mut i = pos;
179 let bytes = text.as_bytes();
180 while i > 0 {
181 i -= 1;
182 if i < bytes.len() && bytes[i] != b'\\' {
184 break;
185 }
186 backslash_count += 1;
187 }
188 backslash_count % 2 == 1
189 }
190}
191
192impl Rule for MD050StrongStyle {
193 fn name(&self) -> &'static str {
194 "MD050"
195 }
196
197 fn description(&self) -> &'static str {
198 "Strong emphasis style should be consistent"
199 }
200
201 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
202 let content = ctx.content;
203 let _line_index = LineIndex::new(content.to_string());
204
205 let mut warnings = Vec::new();
206
207 let target_style = match self.config.style {
208 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
209 _ => self.config.style,
210 };
211
212 let strong_regex = match target_style {
213 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
214 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
215 StrongStyle::Consistent => {
216 &*BOLD_UNDERSCORE_REGEX
219 }
220 };
221
222 let mut byte_pos = 0;
224
225 for (line_num, line) in content.lines().enumerate() {
226 if let Some(line_info) = ctx.line_info(line_num + 1)
228 && line_info.in_front_matter
229 {
230 byte_pos += line.len() + 1; continue;
232 }
233
234 for m in strong_regex.find_iter(line) {
235 let match_byte_pos = byte_pos + m.start();
237
238 if ctx.is_in_code_block_or_span(match_byte_pos)
240 || self.is_in_link(ctx, match_byte_pos)
241 || self.is_in_html_code_content(ctx, match_byte_pos)
242 {
243 continue;
244 }
245
246 let mut inside_html_tag = false;
249 for tag in ctx.html_tags().iter() {
250 if tag.byte_offset < match_byte_pos && match_byte_pos < tag.byte_end - 1 {
252 inside_html_tag = true;
253 break;
254 }
255 }
256 if inside_html_tag {
257 continue;
258 }
259
260 if !self.is_escaped(line, m.start()) {
261 let text = &line[m.start() + 2..m.end() - 2];
262 let message = match target_style {
263 StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
264 StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
265 StrongStyle::Consistent => {
266 "Strong emphasis should use ** instead of __"
269 }
270 };
271
272 let (start_line, start_col, end_line, end_col) =
274 calculate_match_range(line_num + 1, line, m.start(), m.len());
275
276 warnings.push(LintWarning {
277 rule_name: Some(self.name()),
278 line: start_line,
279 column: start_col,
280 end_line,
281 end_column: end_col,
282 message: message.to_string(),
283 severity: Severity::Warning,
284 fix: Some(Fix {
285 range: _line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
286 replacement: match target_style {
287 StrongStyle::Asterisk => format!("**{text}**"),
288 StrongStyle::Underscore => format!("__{text}__"),
289 StrongStyle::Consistent => {
290 format!("**{text}**")
293 }
294 },
295 }),
296 });
297 }
298 }
299
300 byte_pos += line.len() + 1; }
303
304 Ok(warnings)
305 }
306
307 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
308 let content = ctx.content;
309
310 let target_style = match self.config.style {
311 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
312 _ => self.config.style,
313 };
314
315 let strong_regex = match target_style {
316 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
317 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
318 StrongStyle::Consistent => {
319 &*BOLD_UNDERSCORE_REGEX
322 }
323 };
324
325 let matches: Vec<(usize, usize)> = strong_regex
328 .find_iter(content)
329 .filter(|m| {
330 let (line_num, _) = ctx.offset_to_line_col(m.start());
332 if let Some(line_info) = ctx.line_info(line_num)
333 && line_info.in_front_matter
334 {
335 return false;
336 }
337 !ctx.is_in_code_block_or_span(m.start())
338 && !self.is_in_link(ctx, m.start())
339 && !self.is_in_html_tag(ctx, m.start())
340 && !self.is_in_html_code_content(ctx, m.start())
341 })
342 .filter(|m| !self.is_escaped(content, m.start()))
343 .map(|m| (m.start(), m.end()))
344 .collect();
345
346 let mut result = content.to_string();
349 for (start, end) in matches.into_iter().rev() {
350 let text = &result[start + 2..end - 2];
351 let replacement = match target_style {
352 StrongStyle::Asterisk => format!("**{text}**"),
353 StrongStyle::Underscore => format!("__{text}__"),
354 StrongStyle::Consistent => {
355 format!("**{text}**")
358 }
359 };
360 result.replace_range(start..end, &replacement);
361 }
362
363 Ok(result)
364 }
365
366 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
368 ctx.content.is_empty() || !ctx.likely_has_emphasis()
370 }
371
372 fn as_any(&self) -> &dyn std::any::Any {
373 self
374 }
375
376 fn default_config_section(&self) -> Option<(String, toml::Value)> {
377 let json_value = serde_json::to_value(&self.config).ok()?;
378 Some((
379 self.name().to_string(),
380 crate::rule_config_serde::json_to_toml_value(&json_value)?,
381 ))
382 }
383
384 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
385 where
386 Self: Sized,
387 {
388 let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
389 Box::new(Self::from_config_struct(rule_config))
390 }
391}
392
393#[cfg(test)]
394mod tests {
395 use super::*;
396 use crate::lint_context::LintContext;
397
398 #[test]
399 fn test_asterisk_style_with_asterisks() {
400 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
401 let content = "This is **strong text** here.";
402 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
403 let result = rule.check(&ctx).unwrap();
404
405 assert_eq!(result.len(), 0);
406 }
407
408 #[test]
409 fn test_asterisk_style_with_underscores() {
410 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
411 let content = "This is __strong text__ here.";
412 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
413 let result = rule.check(&ctx).unwrap();
414
415 assert_eq!(result.len(), 1);
416 assert!(
417 result[0]
418 .message
419 .contains("Strong emphasis should use ** instead of __")
420 );
421 assert_eq!(result[0].line, 1);
422 assert_eq!(result[0].column, 9);
423 }
424
425 #[test]
426 fn test_underscore_style_with_underscores() {
427 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
428 let content = "This is __strong text__ here.";
429 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
430 let result = rule.check(&ctx).unwrap();
431
432 assert_eq!(result.len(), 0);
433 }
434
435 #[test]
436 fn test_underscore_style_with_asterisks() {
437 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
438 let content = "This is **strong text** here.";
439 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
440 let result = rule.check(&ctx).unwrap();
441
442 assert_eq!(result.len(), 1);
443 assert!(
444 result[0]
445 .message
446 .contains("Strong emphasis should use __ instead of **")
447 );
448 }
449
450 #[test]
451 fn test_consistent_style_first_asterisk() {
452 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
453 let content = "First **strong** then __also strong__.";
454 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
455 let result = rule.check(&ctx).unwrap();
456
457 assert_eq!(result.len(), 1);
459 assert!(
460 result[0]
461 .message
462 .contains("Strong emphasis should use ** instead of __")
463 );
464 }
465
466 #[test]
467 fn test_consistent_style_first_underscore() {
468 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
469 let content = "First __strong__ then **also strong**.";
470 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
471 let result = rule.check(&ctx).unwrap();
472
473 assert_eq!(result.len(), 1);
475 assert!(
476 result[0]
477 .message
478 .contains("Strong emphasis should use __ instead of **")
479 );
480 }
481
482 #[test]
483 fn test_detect_style_asterisk() {
484 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
485 let ctx = LintContext::new("This has **strong** text.", crate::config::MarkdownFlavor::Standard);
486 let style = rule.detect_style(&ctx);
487
488 assert_eq!(style, Some(StrongStyle::Asterisk));
489 }
490
491 #[test]
492 fn test_detect_style_underscore() {
493 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
494 let ctx = LintContext::new("This has __strong__ text.", crate::config::MarkdownFlavor::Standard);
495 let style = rule.detect_style(&ctx);
496
497 assert_eq!(style, Some(StrongStyle::Underscore));
498 }
499
500 #[test]
501 fn test_detect_style_none() {
502 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
503 let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard);
504 let style = rule.detect_style(&ctx);
505
506 assert_eq!(style, None);
507 }
508
509 #[test]
510 fn test_strong_in_code_block() {
511 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
512 let content = "```\n__strong__ in code\n```\n__strong__ outside";
513 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
514 let result = rule.check(&ctx).unwrap();
515
516 assert_eq!(result.len(), 1);
518 assert_eq!(result[0].line, 4);
519 }
520
521 #[test]
522 fn test_strong_in_inline_code() {
523 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
524 let content = "Text with `__strong__` in code and __strong__ outside.";
525 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
526 let result = rule.check(&ctx).unwrap();
527
528 assert_eq!(result.len(), 1);
530 }
531
532 #[test]
533 fn test_escaped_strong() {
534 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
535 let content = "This is \\__not strong\\__ but __this is__.";
536 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
537 let result = rule.check(&ctx).unwrap();
538
539 assert_eq!(result.len(), 1);
541 assert_eq!(result[0].line, 1);
542 assert_eq!(result[0].column, 30);
543 }
544
545 #[test]
546 fn test_fix_asterisks_to_underscores() {
547 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
548 let content = "This is **strong** text.";
549 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
550 let fixed = rule.fix(&ctx).unwrap();
551
552 assert_eq!(fixed, "This is __strong__ text.");
553 }
554
555 #[test]
556 fn test_fix_underscores_to_asterisks() {
557 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
558 let content = "This is __strong__ text.";
559 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
560 let fixed = rule.fix(&ctx).unwrap();
561
562 assert_eq!(fixed, "This is **strong** text.");
563 }
564
565 #[test]
566 fn test_fix_multiple_strong() {
567 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
568 let content = "First __strong__ and second __also strong__.";
569 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
570 let fixed = rule.fix(&ctx).unwrap();
571
572 assert_eq!(fixed, "First **strong** and second **also strong**.");
573 }
574
575 #[test]
576 fn test_fix_preserves_code_blocks() {
577 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
578 let content = "```\n__strong__ in code\n```\n__strong__ outside";
579 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
580 let fixed = rule.fix(&ctx).unwrap();
581
582 assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
583 }
584
585 #[test]
586 fn test_multiline_content() {
587 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
588 let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
589 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
590 let result = rule.check(&ctx).unwrap();
591
592 assert_eq!(result.len(), 2);
593 assert_eq!(result[0].line, 1);
594 assert_eq!(result[1].line, 2);
595 }
596
597 #[test]
598 fn test_nested_emphasis() {
599 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
600 let content = "This has __strong with *emphasis* inside__.";
601 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
602 let result = rule.check(&ctx).unwrap();
603
604 assert_eq!(result.len(), 1);
605 }
606
607 #[test]
608 fn test_empty_content() {
609 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
610 let content = "";
611 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
612 let result = rule.check(&ctx).unwrap();
613
614 assert_eq!(result.len(), 0);
615 }
616
617 #[test]
618 fn test_default_config() {
619 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
620 let (name, _config) = rule.default_config_section().unwrap();
621 assert_eq!(name, "MD050");
622 }
623
624 #[test]
625 fn test_strong_in_links_not_flagged() {
626 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
627 let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
628
629Hint:
630
631- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
632
633
634[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
635 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
636 let result = rule.check(&ctx).unwrap();
637
638 assert_eq!(result.len(), 0);
640 }
641
642 #[test]
643 fn test_strong_in_links_vs_outside_links() {
644 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
645 let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
646
647Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
648
649This is __real strong text__ that should be flagged.
650
651[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
652 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
653 let result = rule.check(&ctx).unwrap();
654
655 assert_eq!(result.len(), 1);
657 assert!(
658 result[0]
659 .message
660 .contains("Strong emphasis should use ** instead of __")
661 );
662 assert!(result[0].line > 4); }
665
666 #[test]
667 fn test_front_matter_not_flagged() {
668 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
669 let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
670 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
671 let result = rule.check(&ctx).unwrap();
672
673 assert_eq!(result.len(), 1);
675 assert_eq!(result[0].line, 6);
676 assert!(
677 result[0]
678 .message
679 .contains("Strong emphasis should use ** instead of __")
680 );
681 }
682
683 #[test]
684 fn test_html_tags_not_flagged() {
685 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
686 let content = r#"# Test
687
688This has HTML with underscores:
689
690<iframe src="https://example.com/__init__/__repr__"> </iframe>
691
692This __should be flagged__ as inconsistent."#;
693 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
694 let result = rule.check(&ctx).unwrap();
695
696 assert_eq!(result.len(), 1);
698 assert_eq!(result[0].line, 7);
699 assert!(
700 result[0]
701 .message
702 .contains("Strong emphasis should use ** instead of __")
703 );
704 }
705}