1use crate::utils::range_utils::calculate_match_range;
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use crate::utils::regex_cache::get_cached_regex;
7
8const REF_DEF_REGEX_STR: &str = r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#;
10
11mod md050_config;
12use md050_config::MD050Config;
13
14#[derive(Debug, Default, Clone)]
20pub struct MD050StrongStyle {
21 config: MD050Config,
22}
23
24impl MD050StrongStyle {
25 pub fn new(style: StrongStyle) -> Self {
26 Self {
27 config: MD050Config { style },
28 }
29 }
30
31 pub fn from_config_struct(config: MD050Config) -> Self {
32 Self { config }
33 }
34
35 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
37 for link in &ctx.links {
39 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
40 return true;
41 }
42 }
43
44 for image in &ctx.images {
46 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
47 return true;
48 }
49 }
50
51 if let Ok(re) = get_cached_regex(REF_DEF_REGEX_STR) {
53 for m in re.find_iter(ctx.content) {
54 if m.start() <= byte_pos && byte_pos < m.end() {
55 return true;
56 }
57 }
58 }
59
60 false
61 }
62
63 fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
65 for html_tag in ctx.html_tags().iter() {
67 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
70 return true;
71 }
72 }
73 false
74 }
75
76 fn is_in_html_code_content(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
79 let html_tags = ctx.html_tags();
80 let mut open_code_pos: Option<usize> = None;
81
82 for tag in html_tags.iter() {
83 if tag.byte_offset > byte_pos {
85 return open_code_pos.is_some();
86 }
87
88 if tag.tag_name == "code" {
89 if tag.is_self_closing {
90 continue;
92 } else if !tag.is_closing {
93 open_code_pos = Some(tag.byte_end);
95 } else if tag.is_closing && open_code_pos.is_some() {
96 if let Some(open_pos) = open_code_pos
98 && byte_pos >= open_pos
99 && byte_pos < tag.byte_offset
100 {
101 return true;
103 }
104 open_code_pos = None;
105 }
106 }
107 }
108
109 open_code_pos.is_some() && byte_pos >= open_code_pos.unwrap()
111 }
112
113 fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
114 let content = ctx.content;
115
116 let mut asterisk_count = 0;
118 for m in BOLD_ASTERISK_REGEX.find_iter(content) {
119 let (line_num, _) = ctx.offset_to_line_col(m.start());
121 let in_front_matter = ctx
122 .line_info(line_num)
123 .map(|info| info.in_front_matter)
124 .unwrap_or(false);
125
126 if !in_front_matter
127 && !ctx.is_in_code_block_or_span(m.start())
128 && !self.is_in_link(ctx, m.start())
129 && !self.is_in_html_tag(ctx, m.start())
130 && !self.is_in_html_code_content(ctx, m.start())
131 {
132 asterisk_count += 1;
133 }
134 }
135
136 let mut underscore_count = 0;
137 for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
138 let (line_num, _) = ctx.offset_to_line_col(m.start());
140 let in_front_matter = ctx
141 .line_info(line_num)
142 .map(|info| info.in_front_matter)
143 .unwrap_or(false);
144
145 if !in_front_matter
146 && !ctx.is_in_code_block_or_span(m.start())
147 && !self.is_in_link(ctx, m.start())
148 && !self.is_in_html_tag(ctx, m.start())
149 && !self.is_in_html_code_content(ctx, m.start())
150 {
151 underscore_count += 1;
152 }
153 }
154
155 match (asterisk_count, underscore_count) {
156 (0, 0) => None,
157 (_, 0) => Some(StrongStyle::Asterisk),
158 (0, _) => Some(StrongStyle::Underscore),
159 (a, u) => {
160 if a >= u {
163 Some(StrongStyle::Asterisk)
164 } else {
165 Some(StrongStyle::Underscore)
166 }
167 }
168 }
169 }
170
171 fn is_escaped(&self, text: &str, pos: usize) -> bool {
172 if pos == 0 {
173 return false;
174 }
175
176 let mut backslash_count = 0;
177 let mut i = pos;
178 let bytes = text.as_bytes();
179 while i > 0 {
180 i -= 1;
181 if i < bytes.len() && bytes[i] != b'\\' {
183 break;
184 }
185 backslash_count += 1;
186 }
187 backslash_count % 2 == 1
188 }
189}
190
191impl Rule for MD050StrongStyle {
192 fn name(&self) -> &'static str {
193 "MD050"
194 }
195
196 fn description(&self) -> &'static str {
197 "Strong emphasis style should be consistent"
198 }
199
200 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
201 let content = ctx.content;
202 let line_index = &ctx.line_index;
203
204 let mut warnings = Vec::new();
205
206 let target_style = match self.config.style {
207 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
208 _ => self.config.style,
209 };
210
211 let strong_regex = match target_style {
212 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
213 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
214 StrongStyle::Consistent => {
215 &*BOLD_UNDERSCORE_REGEX
218 }
219 };
220
221 for (line_num, line) in content.lines().enumerate() {
222 if let Some(line_info) = ctx.line_info(line_num + 1)
224 && line_info.in_front_matter
225 {
226 continue;
227 }
228
229 let byte_pos = line_index.get_line_start_byte(line_num + 1).unwrap_or(0);
230
231 for m in strong_regex.find_iter(line) {
232 let match_byte_pos = byte_pos + m.start();
234
235 if ctx.is_in_code_block_or_span(match_byte_pos)
237 || self.is_in_link(ctx, match_byte_pos)
238 || self.is_in_html_code_content(ctx, match_byte_pos)
239 {
240 continue;
241 }
242
243 let mut inside_html_tag = false;
246 for tag in ctx.html_tags().iter() {
247 if tag.byte_offset < match_byte_pos && match_byte_pos < tag.byte_end - 1 {
249 inside_html_tag = true;
250 break;
251 }
252 }
253 if inside_html_tag {
254 continue;
255 }
256
257 if !self.is_escaped(line, m.start()) {
258 let text = &line[m.start() + 2..m.end() - 2];
259 let message = match target_style {
260 StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
261 StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
262 StrongStyle::Consistent => {
263 "Strong emphasis should use ** instead of __"
266 }
267 };
268
269 let (start_line, start_col, end_line, end_col) =
271 calculate_match_range(line_num + 1, line, m.start(), m.len());
272
273 warnings.push(LintWarning {
274 rule_name: Some(self.name().to_string()),
275 line: start_line,
276 column: start_col,
277 end_line,
278 end_column: end_col,
279 message: message.to_string(),
280 severity: Severity::Warning,
281 fix: Some(Fix {
282 range: line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
283 replacement: match target_style {
284 StrongStyle::Asterisk => format!("**{text}**"),
285 StrongStyle::Underscore => format!("__{text}__"),
286 StrongStyle::Consistent => {
287 format!("**{text}**")
290 }
291 },
292 }),
293 });
294 }
295 }
296 }
297
298 Ok(warnings)
299 }
300
301 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
302 let content = ctx.content;
303
304 let target_style = match self.config.style {
305 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
306 _ => self.config.style,
307 };
308
309 let strong_regex = match target_style {
310 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
311 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
312 StrongStyle::Consistent => {
313 &*BOLD_UNDERSCORE_REGEX
316 }
317 };
318
319 let matches: Vec<(usize, usize)> = strong_regex
322 .find_iter(content)
323 .filter(|m| {
324 let (line_num, _) = ctx.offset_to_line_col(m.start());
326 if let Some(line_info) = ctx.line_info(line_num)
327 && line_info.in_front_matter
328 {
329 return false;
330 }
331 !ctx.is_in_code_block_or_span(m.start())
332 && !self.is_in_link(ctx, m.start())
333 && !self.is_in_html_tag(ctx, m.start())
334 && !self.is_in_html_code_content(ctx, m.start())
335 })
336 .filter(|m| !self.is_escaped(content, m.start()))
337 .map(|m| (m.start(), m.end()))
338 .collect();
339
340 let mut result = content.to_string();
343 for (start, end) in matches.into_iter().rev() {
344 let text = &result[start + 2..end - 2];
345 let replacement = match target_style {
346 StrongStyle::Asterisk => format!("**{text}**"),
347 StrongStyle::Underscore => format!("__{text}__"),
348 StrongStyle::Consistent => {
349 format!("**{text}**")
352 }
353 };
354 result.replace_range(start..end, &replacement);
355 }
356
357 Ok(result)
358 }
359
360 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
362 ctx.content.is_empty() || !ctx.likely_has_emphasis()
364 }
365
366 fn as_any(&self) -> &dyn std::any::Any {
367 self
368 }
369
370 fn default_config_section(&self) -> Option<(String, toml::Value)> {
371 let json_value = serde_json::to_value(&self.config).ok()?;
372 Some((
373 self.name().to_string(),
374 crate::rule_config_serde::json_to_toml_value(&json_value)?,
375 ))
376 }
377
378 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
379 where
380 Self: Sized,
381 {
382 let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
383 Box::new(Self::from_config_struct(rule_config))
384 }
385}
386
387#[cfg(test)]
388mod tests {
389 use super::*;
390 use crate::lint_context::LintContext;
391
392 #[test]
393 fn test_asterisk_style_with_asterisks() {
394 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
395 let content = "This is **strong text** here.";
396 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
397 let result = rule.check(&ctx).unwrap();
398
399 assert_eq!(result.len(), 0);
400 }
401
402 #[test]
403 fn test_asterisk_style_with_underscores() {
404 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
405 let content = "This is __strong text__ here.";
406 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
407 let result = rule.check(&ctx).unwrap();
408
409 assert_eq!(result.len(), 1);
410 assert!(
411 result[0]
412 .message
413 .contains("Strong emphasis should use ** instead of __")
414 );
415 assert_eq!(result[0].line, 1);
416 assert_eq!(result[0].column, 9);
417 }
418
419 #[test]
420 fn test_underscore_style_with_underscores() {
421 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
422 let content = "This is __strong text__ here.";
423 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
424 let result = rule.check(&ctx).unwrap();
425
426 assert_eq!(result.len(), 0);
427 }
428
429 #[test]
430 fn test_underscore_style_with_asterisks() {
431 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
432 let content = "This is **strong text** here.";
433 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
434 let result = rule.check(&ctx).unwrap();
435
436 assert_eq!(result.len(), 1);
437 assert!(
438 result[0]
439 .message
440 .contains("Strong emphasis should use __ instead of **")
441 );
442 }
443
444 #[test]
445 fn test_consistent_style_first_asterisk() {
446 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
447 let content = "First **strong** then __also strong__.";
448 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
449 let result = rule.check(&ctx).unwrap();
450
451 assert_eq!(result.len(), 1);
453 assert!(
454 result[0]
455 .message
456 .contains("Strong emphasis should use ** instead of __")
457 );
458 }
459
460 #[test]
461 fn test_consistent_style_tie_prefers_asterisk() {
462 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
463 let content = "First __strong__ then **also strong**.";
464 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
465 let result = rule.check(&ctx).unwrap();
466
467 assert_eq!(result.len(), 1);
470 assert!(
471 result[0]
472 .message
473 .contains("Strong emphasis should use ** instead of __")
474 );
475 }
476
477 #[test]
478 fn test_detect_style_asterisk() {
479 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
480 let ctx = LintContext::new(
481 "This has **strong** text.",
482 crate::config::MarkdownFlavor::Standard,
483 None,
484 );
485 let style = rule.detect_style(&ctx);
486
487 assert_eq!(style, Some(StrongStyle::Asterisk));
488 }
489
490 #[test]
491 fn test_detect_style_underscore() {
492 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
493 let ctx = LintContext::new(
494 "This has __strong__ text.",
495 crate::config::MarkdownFlavor::Standard,
496 None,
497 );
498 let style = rule.detect_style(&ctx);
499
500 assert_eq!(style, Some(StrongStyle::Underscore));
501 }
502
503 #[test]
504 fn test_detect_style_none() {
505 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
506 let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard, None);
507 let style = rule.detect_style(&ctx);
508
509 assert_eq!(style, None);
510 }
511
512 #[test]
513 fn test_strong_in_code_block() {
514 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
515 let content = "```\n__strong__ in code\n```\n__strong__ outside";
516 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
517 let result = rule.check(&ctx).unwrap();
518
519 assert_eq!(result.len(), 1);
521 assert_eq!(result[0].line, 4);
522 }
523
524 #[test]
525 fn test_strong_in_inline_code() {
526 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
527 let content = "Text with `__strong__` in code and __strong__ outside.";
528 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
529 let result = rule.check(&ctx).unwrap();
530
531 assert_eq!(result.len(), 1);
533 }
534
535 #[test]
536 fn test_escaped_strong() {
537 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
538 let content = "This is \\__not strong\\__ but __this is__.";
539 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
540 let result = rule.check(&ctx).unwrap();
541
542 assert_eq!(result.len(), 1);
544 assert_eq!(result[0].line, 1);
545 assert_eq!(result[0].column, 30);
546 }
547
548 #[test]
549 fn test_fix_asterisks_to_underscores() {
550 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
551 let content = "This is **strong** text.";
552 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
553 let fixed = rule.fix(&ctx).unwrap();
554
555 assert_eq!(fixed, "This is __strong__ text.");
556 }
557
558 #[test]
559 fn test_fix_underscores_to_asterisks() {
560 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
561 let content = "This is __strong__ text.";
562 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
563 let fixed = rule.fix(&ctx).unwrap();
564
565 assert_eq!(fixed, "This is **strong** text.");
566 }
567
568 #[test]
569 fn test_fix_multiple_strong() {
570 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
571 let content = "First __strong__ and second __also strong__.";
572 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
573 let fixed = rule.fix(&ctx).unwrap();
574
575 assert_eq!(fixed, "First **strong** and second **also strong**.");
576 }
577
578 #[test]
579 fn test_fix_preserves_code_blocks() {
580 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
581 let content = "```\n__strong__ in code\n```\n__strong__ outside";
582 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
583 let fixed = rule.fix(&ctx).unwrap();
584
585 assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
586 }
587
588 #[test]
589 fn test_multiline_content() {
590 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
591 let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
592 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
593 let result = rule.check(&ctx).unwrap();
594
595 assert_eq!(result.len(), 2);
596 assert_eq!(result[0].line, 1);
597 assert_eq!(result[1].line, 2);
598 }
599
600 #[test]
601 fn test_nested_emphasis() {
602 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
603 let content = "This has __strong with *emphasis* inside__.";
604 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
605 let result = rule.check(&ctx).unwrap();
606
607 assert_eq!(result.len(), 1);
608 }
609
610 #[test]
611 fn test_empty_content() {
612 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
613 let content = "";
614 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
615 let result = rule.check(&ctx).unwrap();
616
617 assert_eq!(result.len(), 0);
618 }
619
620 #[test]
621 fn test_default_config() {
622 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
623 let (name, _config) = rule.default_config_section().unwrap();
624 assert_eq!(name, "MD050");
625 }
626
627 #[test]
628 fn test_strong_in_links_not_flagged() {
629 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
630 let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
631
632Hint:
633
634- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
635
636
637[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
638 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
639 let result = rule.check(&ctx).unwrap();
640
641 assert_eq!(result.len(), 0);
643 }
644
645 #[test]
646 fn test_strong_in_links_vs_outside_links() {
647 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
648 let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
649
650Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
651
652This is __real strong text__ that should be flagged.
653
654[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
655 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
656 let result = rule.check(&ctx).unwrap();
657
658 assert_eq!(result.len(), 1);
660 assert!(
661 result[0]
662 .message
663 .contains("Strong emphasis should use ** instead of __")
664 );
665 assert!(result[0].line > 4); }
668
669 #[test]
670 fn test_front_matter_not_flagged() {
671 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
672 let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
673 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
674 let result = rule.check(&ctx).unwrap();
675
676 assert_eq!(result.len(), 1);
678 assert_eq!(result[0].line, 6);
679 assert!(
680 result[0]
681 .message
682 .contains("Strong emphasis should use ** instead of __")
683 );
684 }
685
686 #[test]
687 fn test_html_tags_not_flagged() {
688 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
689 let content = r#"# Test
690
691This has HTML with underscores:
692
693<iframe src="https://example.com/__init__/__repr__"> </iframe>
694
695This __should be flagged__ as inconsistent."#;
696 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
697 let result = rule.check(&ctx).unwrap();
698
699 assert_eq!(result.len(), 1);
701 assert_eq!(result[0].line, 7);
702 assert!(
703 result[0]
704 .message
705 .contains("Strong emphasis should use ** instead of __")
706 );
707 }
708}