1use crate::utils::range_utils::calculate_match_range;
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use crate::utils::regex_cache::get_cached_regex;
7
8const REF_DEF_REGEX_STR: &str = r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#;
10
11mod md050_config;
12use md050_config::MD050Config;
13
14#[derive(Debug, Default, Clone)]
20pub struct MD050StrongStyle {
21 config: MD050Config,
22}
23
24impl MD050StrongStyle {
25 pub fn new(style: StrongStyle) -> Self {
26 Self {
27 config: MD050Config { style },
28 }
29 }
30
31 pub fn from_config_struct(config: MD050Config) -> Self {
32 Self { config }
33 }
34
35 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
37 for link in &ctx.links {
39 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
40 return true;
41 }
42 }
43
44 for image in &ctx.images {
46 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
47 return true;
48 }
49 }
50
51 if let Ok(re) = get_cached_regex(REF_DEF_REGEX_STR) {
53 for m in re.find_iter(ctx.content) {
54 if m.start() <= byte_pos && byte_pos < m.end() {
55 return true;
56 }
57 }
58 }
59
60 false
61 }
62
63 fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
65 for html_tag in ctx.html_tags().iter() {
67 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
70 return true;
71 }
72 }
73 false
74 }
75
76 fn is_in_html_code_content(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
79 let html_tags = ctx.html_tags();
80 let mut open_code_pos: Option<usize> = None;
81
82 for tag in html_tags.iter() {
83 if tag.byte_offset > byte_pos {
85 return open_code_pos.is_some();
86 }
87
88 if tag.tag_name == "code" {
89 if tag.is_self_closing {
90 continue;
92 } else if !tag.is_closing {
93 open_code_pos = Some(tag.byte_end);
95 } else if tag.is_closing && open_code_pos.is_some() {
96 if let Some(open_pos) = open_code_pos
98 && byte_pos >= open_pos
99 && byte_pos < tag.byte_offset
100 {
101 return true;
103 }
104 open_code_pos = None;
105 }
106 }
107 }
108
109 open_code_pos.is_some() && byte_pos >= open_code_pos.unwrap()
111 }
112
113 fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
114 let content = ctx.content;
115
116 let mut asterisk_count = 0;
118 for m in BOLD_ASTERISK_REGEX.find_iter(content) {
119 let (line_num, _) = ctx.offset_to_line_col(m.start());
121 let in_front_matter = ctx
122 .line_info(line_num)
123 .map(|info| info.in_front_matter)
124 .unwrap_or(false);
125
126 if !in_front_matter
127 && !ctx.is_in_code_block_or_span(m.start())
128 && !self.is_in_link(ctx, m.start())
129 && !self.is_in_html_tag(ctx, m.start())
130 && !self.is_in_html_code_content(ctx, m.start())
131 {
132 asterisk_count += 1;
133 }
134 }
135
136 let mut underscore_count = 0;
137 for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
138 let (line_num, _) = ctx.offset_to_line_col(m.start());
140 let in_front_matter = ctx
141 .line_info(line_num)
142 .map(|info| info.in_front_matter)
143 .unwrap_or(false);
144
145 if !in_front_matter
146 && !ctx.is_in_code_block_or_span(m.start())
147 && !self.is_in_link(ctx, m.start())
148 && !self.is_in_html_tag(ctx, m.start())
149 && !self.is_in_html_code_content(ctx, m.start())
150 {
151 underscore_count += 1;
152 }
153 }
154
155 match (asterisk_count, underscore_count) {
156 (0, 0) => None,
157 (_, 0) => Some(StrongStyle::Asterisk),
158 (0, _) => Some(StrongStyle::Underscore),
159 (a, u) => {
160 if a >= u {
163 Some(StrongStyle::Asterisk)
164 } else {
165 Some(StrongStyle::Underscore)
166 }
167 }
168 }
169 }
170
171 fn is_escaped(&self, text: &str, pos: usize) -> bool {
172 if pos == 0 {
173 return false;
174 }
175
176 let mut backslash_count = 0;
177 let mut i = pos;
178 let bytes = text.as_bytes();
179 while i > 0 {
180 i -= 1;
181 if i < bytes.len() && bytes[i] != b'\\' {
183 break;
184 }
185 backslash_count += 1;
186 }
187 backslash_count % 2 == 1
188 }
189}
190
191impl Rule for MD050StrongStyle {
192 fn name(&self) -> &'static str {
193 "MD050"
194 }
195
196 fn description(&self) -> &'static str {
197 "Strong emphasis style should be consistent"
198 }
199
200 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
201 let content = ctx.content;
202 let line_index = &ctx.line_index;
203
204 let mut warnings = Vec::new();
205
206 let target_style = match self.config.style {
207 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
208 _ => self.config.style,
209 };
210
211 let strong_regex = match target_style {
212 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
213 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
214 StrongStyle::Consistent => {
215 &*BOLD_UNDERSCORE_REGEX
218 }
219 };
220
221 for (line_num, line) in content.lines().enumerate() {
222 if let Some(line_info) = ctx.line_info(line_num + 1)
224 && line_info.in_front_matter
225 {
226 continue;
227 }
228
229 let byte_pos = line_index.get_line_start_byte(line_num + 1).unwrap_or(0);
230
231 for m in strong_regex.find_iter(line) {
232 let match_byte_pos = byte_pos + m.start();
234
235 if ctx.is_in_code_block_or_span(match_byte_pos)
237 || self.is_in_link(ctx, match_byte_pos)
238 || self.is_in_html_code_content(ctx, match_byte_pos)
239 {
240 continue;
241 }
242
243 let mut inside_html_tag = false;
246 for tag in ctx.html_tags().iter() {
247 if tag.byte_offset < match_byte_pos && match_byte_pos < tag.byte_end - 1 {
249 inside_html_tag = true;
250 break;
251 }
252 }
253 if inside_html_tag {
254 continue;
255 }
256
257 if !self.is_escaped(line, m.start()) {
258 let text = &line[m.start() + 2..m.end() - 2];
259
260 let message = match target_style {
267 StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
268 StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
269 StrongStyle::Consistent => "Strong emphasis should use ** instead of __",
270 };
271
272 let (start_line, start_col, end_line, end_col) =
274 calculate_match_range(line_num + 1, line, m.start(), m.len());
275
276 warnings.push(LintWarning {
277 rule_name: Some(self.name().to_string()),
278 line: start_line,
279 column: start_col,
280 end_line,
281 end_column: end_col,
282 message: message.to_string(),
283 severity: Severity::Warning,
284 fix: Some(Fix {
285 range: line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
286 replacement: match target_style {
287 StrongStyle::Asterisk => format!("**{text}**"),
288 StrongStyle::Underscore => format!("__{text}__"),
289 StrongStyle::Consistent => format!("**{text}**"),
290 },
291 }),
292 });
293 }
294 }
295 }
296
297 Ok(warnings)
298 }
299
300 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
301 let content = ctx.content;
302
303 let target_style = match self.config.style {
304 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
305 _ => self.config.style,
306 };
307
308 let strong_regex = match target_style {
309 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
310 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
311 StrongStyle::Consistent => {
312 &*BOLD_UNDERSCORE_REGEX
315 }
316 };
317
318 let matches: Vec<(usize, usize)> = strong_regex
321 .find_iter(content)
322 .filter(|m| {
323 let (line_num, _) = ctx.offset_to_line_col(m.start());
325 if let Some(line_info) = ctx.line_info(line_num)
326 && line_info.in_front_matter
327 {
328 return false;
329 }
330 !ctx.is_in_code_block_or_span(m.start())
331 && !self.is_in_link(ctx, m.start())
332 && !self.is_in_html_tag(ctx, m.start())
333 && !self.is_in_html_code_content(ctx, m.start())
334 })
335 .filter(|m| !self.is_escaped(content, m.start()))
336 .map(|m| (m.start(), m.end()))
337 .collect();
338
339 let mut result = content.to_string();
342 for (start, end) in matches.into_iter().rev() {
343 let text = &result[start + 2..end - 2];
344 let replacement = match target_style {
345 StrongStyle::Asterisk => format!("**{text}**"),
346 StrongStyle::Underscore => format!("__{text}__"),
347 StrongStyle::Consistent => {
348 format!("**{text}**")
351 }
352 };
353 result.replace_range(start..end, &replacement);
354 }
355
356 Ok(result)
357 }
358
359 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
361 ctx.content.is_empty() || !ctx.likely_has_emphasis()
363 }
364
365 fn as_any(&self) -> &dyn std::any::Any {
366 self
367 }
368
369 fn default_config_section(&self) -> Option<(String, toml::Value)> {
370 let json_value = serde_json::to_value(&self.config).ok()?;
371 Some((
372 self.name().to_string(),
373 crate::rule_config_serde::json_to_toml_value(&json_value)?,
374 ))
375 }
376
377 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
378 where
379 Self: Sized,
380 {
381 let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
382 Box::new(Self::from_config_struct(rule_config))
383 }
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389 use crate::lint_context::LintContext;
390
391 #[test]
392 fn test_asterisk_style_with_asterisks() {
393 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
394 let content = "This is **strong text** here.";
395 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
396 let result = rule.check(&ctx).unwrap();
397
398 assert_eq!(result.len(), 0);
399 }
400
401 #[test]
402 fn test_asterisk_style_with_underscores() {
403 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
404 let content = "This is __strong text__ here.";
405 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
406 let result = rule.check(&ctx).unwrap();
407
408 assert_eq!(result.len(), 1);
409 assert!(
410 result[0]
411 .message
412 .contains("Strong emphasis should use ** instead of __")
413 );
414 assert_eq!(result[0].line, 1);
415 assert_eq!(result[0].column, 9);
416 }
417
418 #[test]
419 fn test_underscore_style_with_underscores() {
420 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
421 let content = "This is __strong text__ here.";
422 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
423 let result = rule.check(&ctx).unwrap();
424
425 assert_eq!(result.len(), 0);
426 }
427
428 #[test]
429 fn test_underscore_style_with_asterisks() {
430 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
431 let content = "This is **strong text** here.";
432 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
433 let result = rule.check(&ctx).unwrap();
434
435 assert_eq!(result.len(), 1);
436 assert!(
437 result[0]
438 .message
439 .contains("Strong emphasis should use __ instead of **")
440 );
441 }
442
443 #[test]
444 fn test_consistent_style_first_asterisk() {
445 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
446 let content = "First **strong** then __also strong__.";
447 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
448 let result = rule.check(&ctx).unwrap();
449
450 assert_eq!(result.len(), 1);
452 assert!(
453 result[0]
454 .message
455 .contains("Strong emphasis should use ** instead of __")
456 );
457 }
458
459 #[test]
460 fn test_consistent_style_tie_prefers_asterisk() {
461 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
462 let content = "First __strong__ then **also strong**.";
463 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
464 let result = rule.check(&ctx).unwrap();
465
466 assert_eq!(result.len(), 1);
469 assert!(
470 result[0]
471 .message
472 .contains("Strong emphasis should use ** instead of __")
473 );
474 }
475
476 #[test]
477 fn test_detect_style_asterisk() {
478 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
479 let ctx = LintContext::new(
480 "This has **strong** text.",
481 crate::config::MarkdownFlavor::Standard,
482 None,
483 );
484 let style = rule.detect_style(&ctx);
485
486 assert_eq!(style, Some(StrongStyle::Asterisk));
487 }
488
489 #[test]
490 fn test_detect_style_underscore() {
491 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
492 let ctx = LintContext::new(
493 "This has __strong__ text.",
494 crate::config::MarkdownFlavor::Standard,
495 None,
496 );
497 let style = rule.detect_style(&ctx);
498
499 assert_eq!(style, Some(StrongStyle::Underscore));
500 }
501
502 #[test]
503 fn test_detect_style_none() {
504 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
505 let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard, None);
506 let style = rule.detect_style(&ctx);
507
508 assert_eq!(style, None);
509 }
510
511 #[test]
512 fn test_strong_in_code_block() {
513 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
514 let content = "```\n__strong__ in code\n```\n__strong__ outside";
515 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
516 let result = rule.check(&ctx).unwrap();
517
518 assert_eq!(result.len(), 1);
520 assert_eq!(result[0].line, 4);
521 }
522
523 #[test]
524 fn test_strong_in_inline_code() {
525 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
526 let content = "Text with `__strong__` in code and __strong__ outside.";
527 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
528 let result = rule.check(&ctx).unwrap();
529
530 assert_eq!(result.len(), 1);
532 }
533
534 #[test]
535 fn test_escaped_strong() {
536 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
537 let content = "This is \\__not strong\\__ but __this is__.";
538 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
539 let result = rule.check(&ctx).unwrap();
540
541 assert_eq!(result.len(), 1);
543 assert_eq!(result[0].line, 1);
544 assert_eq!(result[0].column, 30);
545 }
546
547 #[test]
548 fn test_fix_asterisks_to_underscores() {
549 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
550 let content = "This is **strong** text.";
551 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
552 let fixed = rule.fix(&ctx).unwrap();
553
554 assert_eq!(fixed, "This is __strong__ text.");
555 }
556
557 #[test]
558 fn test_fix_underscores_to_asterisks() {
559 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
560 let content = "This is __strong__ text.";
561 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
562 let fixed = rule.fix(&ctx).unwrap();
563
564 assert_eq!(fixed, "This is **strong** text.");
565 }
566
567 #[test]
568 fn test_fix_multiple_strong() {
569 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
570 let content = "First __strong__ and second __also strong__.";
571 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
572 let fixed = rule.fix(&ctx).unwrap();
573
574 assert_eq!(fixed, "First **strong** and second **also strong**.");
575 }
576
577 #[test]
578 fn test_fix_preserves_code_blocks() {
579 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
580 let content = "```\n__strong__ in code\n```\n__strong__ outside";
581 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
582 let fixed = rule.fix(&ctx).unwrap();
583
584 assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
585 }
586
587 #[test]
588 fn test_multiline_content() {
589 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
590 let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
591 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
592 let result = rule.check(&ctx).unwrap();
593
594 assert_eq!(result.len(), 2);
595 assert_eq!(result[0].line, 1);
596 assert_eq!(result[1].line, 2);
597 }
598
599 #[test]
600 fn test_nested_emphasis() {
601 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
602 let content = "This has __strong with *emphasis* inside__.";
603 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
604 let result = rule.check(&ctx).unwrap();
605
606 assert_eq!(result.len(), 1);
607 }
608
609 #[test]
610 fn test_empty_content() {
611 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
612 let content = "";
613 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
614 let result = rule.check(&ctx).unwrap();
615
616 assert_eq!(result.len(), 0);
617 }
618
619 #[test]
620 fn test_default_config() {
621 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
622 let (name, _config) = rule.default_config_section().unwrap();
623 assert_eq!(name, "MD050");
624 }
625
626 #[test]
627 fn test_strong_in_links_not_flagged() {
628 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
629 let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
630
631Hint:
632
633- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
634
635
636[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
637 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
638 let result = rule.check(&ctx).unwrap();
639
640 assert_eq!(result.len(), 0);
642 }
643
644 #[test]
645 fn test_strong_in_links_vs_outside_links() {
646 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
647 let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
648
649Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
650
651This is __real strong text__ that should be flagged.
652
653[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
654 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
655 let result = rule.check(&ctx).unwrap();
656
657 assert_eq!(result.len(), 1);
659 assert!(
660 result[0]
661 .message
662 .contains("Strong emphasis should use ** instead of __")
663 );
664 assert!(result[0].line > 4); }
667
668 #[test]
669 fn test_front_matter_not_flagged() {
670 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
671 let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
672 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
673 let result = rule.check(&ctx).unwrap();
674
675 assert_eq!(result.len(), 1);
677 assert_eq!(result[0].line, 6);
678 assert!(
679 result[0]
680 .message
681 .contains("Strong emphasis should use ** instead of __")
682 );
683 }
684
685 #[test]
686 fn test_html_tags_not_flagged() {
687 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
688 let content = r#"# Test
689
690This has HTML with underscores:
691
692<iframe src="https://example.com/__init__/__repr__"> </iframe>
693
694This __should be flagged__ as inconsistent."#;
695 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
696 let result = rule.check(&ctx).unwrap();
697
698 assert_eq!(result.len(), 1);
700 assert_eq!(result[0].line, 7);
701 assert!(
702 result[0]
703 .message
704 .contains("Strong emphasis should use ** instead of __")
705 );
706 }
707}