1use crate::utils::range_utils::calculate_match_range;
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use crate::utils::regex_cache::get_cached_regex;
7
8const REF_DEF_REGEX_STR: &str = r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#;
10
11mod md050_config;
12use md050_config::MD050Config;
13
14#[derive(Debug, Default, Clone)]
20pub struct MD050StrongStyle {
21 config: MD050Config,
22}
23
24impl MD050StrongStyle {
25 pub fn new(style: StrongStyle) -> Self {
26 Self {
27 config: MD050Config { style },
28 }
29 }
30
31 pub fn from_config_struct(config: MD050Config) -> Self {
32 Self { config }
33 }
34
35 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
37 for link in &ctx.links {
39 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
40 return true;
41 }
42 }
43
44 for image in &ctx.images {
46 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
47 return true;
48 }
49 }
50
51 if let Ok(re) = get_cached_regex(REF_DEF_REGEX_STR) {
53 for m in re.find_iter(ctx.content) {
54 if m.start() <= byte_pos && byte_pos < m.end() {
55 return true;
56 }
57 }
58 }
59
60 false
61 }
62
63 fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
65 for html_tag in ctx.html_tags().iter() {
67 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
70 return true;
71 }
72 }
73 false
74 }
75
76 fn is_in_html_code_content(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
79 let html_tags = ctx.html_tags();
80 let mut open_code_pos: Option<usize> = None;
81
82 for tag in html_tags.iter() {
83 if tag.byte_offset > byte_pos {
85 return open_code_pos.is_some();
86 }
87
88 if tag.tag_name == "code" {
89 if tag.is_self_closing {
90 continue;
92 } else if !tag.is_closing {
93 open_code_pos = Some(tag.byte_end);
95 } else if tag.is_closing && open_code_pos.is_some() {
96 if let Some(open_pos) = open_code_pos
98 && byte_pos >= open_pos
99 && byte_pos < tag.byte_offset
100 {
101 return true;
103 }
104 open_code_pos = None;
105 }
106 }
107 }
108
109 open_code_pos.is_some() && byte_pos >= open_code_pos.unwrap()
111 }
112
113 fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
114 let content = ctx.content;
115
116 let mut asterisk_count = 0;
118 for m in BOLD_ASTERISK_REGEX.find_iter(content) {
119 let (line_num, _) = ctx.offset_to_line_col(m.start());
121 let in_front_matter = ctx
122 .line_info(line_num)
123 .map(|info| info.in_front_matter)
124 .unwrap_or(false);
125
126 if !in_front_matter
127 && !ctx.is_in_code_block_or_span(m.start())
128 && !self.is_in_link(ctx, m.start())
129 && !self.is_in_html_tag(ctx, m.start())
130 && !self.is_in_html_code_content(ctx, m.start())
131 {
132 asterisk_count += 1;
133 }
134 }
135
136 let mut underscore_count = 0;
137 for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
138 let (line_num, _) = ctx.offset_to_line_col(m.start());
140 let in_front_matter = ctx
141 .line_info(line_num)
142 .map(|info| info.in_front_matter)
143 .unwrap_or(false);
144
145 if !in_front_matter
146 && !ctx.is_in_code_block_or_span(m.start())
147 && !self.is_in_link(ctx, m.start())
148 && !self.is_in_html_tag(ctx, m.start())
149 && !self.is_in_html_code_content(ctx, m.start())
150 {
151 underscore_count += 1;
152 }
153 }
154
155 match (asterisk_count, underscore_count) {
156 (0, 0) => None,
157 (_, 0) => Some(StrongStyle::Asterisk),
158 (0, _) => Some(StrongStyle::Underscore),
159 (a, u) => {
160 if a >= u {
163 Some(StrongStyle::Asterisk)
164 } else {
165 Some(StrongStyle::Underscore)
166 }
167 }
168 }
169 }
170
171 fn is_escaped(&self, text: &str, pos: usize) -> bool {
172 if pos == 0 {
173 return false;
174 }
175
176 let mut backslash_count = 0;
177 let mut i = pos;
178 let bytes = text.as_bytes();
179 while i > 0 {
180 i -= 1;
181 if i < bytes.len() && bytes[i] != b'\\' {
183 break;
184 }
185 backslash_count += 1;
186 }
187 backslash_count % 2 == 1
188 }
189}
190
191impl Rule for MD050StrongStyle {
192 fn name(&self) -> &'static str {
193 "MD050"
194 }
195
196 fn description(&self) -> &'static str {
197 "Strong emphasis style should be consistent"
198 }
199
200 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
201 let content = ctx.content;
202 let line_index = &ctx.line_index;
203
204 let mut warnings = Vec::new();
205
206 let target_style = match self.config.style {
207 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
208 _ => self.config.style,
209 };
210
211 let strong_regex = match target_style {
212 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
213 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
214 StrongStyle::Consistent => {
215 &*BOLD_UNDERSCORE_REGEX
218 }
219 };
220
221 for (line_num, line) in content.lines().enumerate() {
222 if let Some(line_info) = ctx.line_info(line_num + 1)
224 && line_info.in_front_matter
225 {
226 continue;
227 }
228
229 let byte_pos = line_index.get_line_start_byte(line_num + 1).unwrap_or(0);
230
231 for m in strong_regex.find_iter(line) {
232 let match_byte_pos = byte_pos + m.start();
234
235 if ctx.is_in_code_block_or_span(match_byte_pos)
237 || self.is_in_link(ctx, match_byte_pos)
238 || self.is_in_html_code_content(ctx, match_byte_pos)
239 {
240 continue;
241 }
242
243 let mut inside_html_tag = false;
246 for tag in ctx.html_tags().iter() {
247 if tag.byte_offset < match_byte_pos && match_byte_pos < tag.byte_end - 1 {
249 inside_html_tag = true;
250 break;
251 }
252 }
253 if inside_html_tag {
254 continue;
255 }
256
257 if !self.is_escaped(line, m.start()) {
258 let text = &line[m.start() + 2..m.end() - 2];
259 let message = match target_style {
260 StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
261 StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
262 StrongStyle::Consistent => {
263 "Strong emphasis should use ** instead of __"
266 }
267 };
268
269 let (start_line, start_col, end_line, end_col) =
271 calculate_match_range(line_num + 1, line, m.start(), m.len());
272
273 warnings.push(LintWarning {
274 rule_name: Some(self.name().to_string()),
275 line: start_line,
276 column: start_col,
277 end_line,
278 end_column: end_col,
279 message: message.to_string(),
280 severity: Severity::Warning,
281 fix: Some(Fix {
282 range: line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
283 replacement: match target_style {
284 StrongStyle::Asterisk => format!("**{text}**"),
285 StrongStyle::Underscore => format!("__{text}__"),
286 StrongStyle::Consistent => {
287 format!("**{text}**")
290 }
291 },
292 }),
293 });
294 }
295 }
296 }
297
298 Ok(warnings)
299 }
300
301 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
302 let content = ctx.content;
303
304 let target_style = match self.config.style {
305 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
306 _ => self.config.style,
307 };
308
309 let strong_regex = match target_style {
310 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
311 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
312 StrongStyle::Consistent => {
313 &*BOLD_UNDERSCORE_REGEX
316 }
317 };
318
319 let matches: Vec<(usize, usize)> = strong_regex
322 .find_iter(content)
323 .filter(|m| {
324 let (line_num, _) = ctx.offset_to_line_col(m.start());
326 if let Some(line_info) = ctx.line_info(line_num)
327 && line_info.in_front_matter
328 {
329 return false;
330 }
331 !ctx.is_in_code_block_or_span(m.start())
332 && !self.is_in_link(ctx, m.start())
333 && !self.is_in_html_tag(ctx, m.start())
334 && !self.is_in_html_code_content(ctx, m.start())
335 })
336 .filter(|m| !self.is_escaped(content, m.start()))
337 .map(|m| (m.start(), m.end()))
338 .collect();
339
340 let mut result = content.to_string();
343 for (start, end) in matches.into_iter().rev() {
344 let text = &result[start + 2..end - 2];
345 let replacement = match target_style {
346 StrongStyle::Asterisk => format!("**{text}**"),
347 StrongStyle::Underscore => format!("__{text}__"),
348 StrongStyle::Consistent => {
349 format!("**{text}**")
352 }
353 };
354 result.replace_range(start..end, &replacement);
355 }
356
357 Ok(result)
358 }
359
360 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
362 ctx.content.is_empty() || !ctx.likely_has_emphasis()
364 }
365
366 fn as_any(&self) -> &dyn std::any::Any {
367 self
368 }
369
370 fn default_config_section(&self) -> Option<(String, toml::Value)> {
371 let json_value = serde_json::to_value(&self.config).ok()?;
372 Some((
373 self.name().to_string(),
374 crate::rule_config_serde::json_to_toml_value(&json_value)?,
375 ))
376 }
377
378 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
379 where
380 Self: Sized,
381 {
382 let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
383 Box::new(Self::from_config_struct(rule_config))
384 }
385}
386
387#[cfg(test)]
388mod tests {
389 use super::*;
390 use crate::lint_context::LintContext;
391
392 #[test]
393 fn test_asterisk_style_with_asterisks() {
394 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
395 let content = "This is **strong text** here.";
396 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
397 let result = rule.check(&ctx).unwrap();
398
399 assert_eq!(result.len(), 0);
400 }
401
402 #[test]
403 fn test_asterisk_style_with_underscores() {
404 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
405 let content = "This is __strong text__ here.";
406 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
407 let result = rule.check(&ctx).unwrap();
408
409 assert_eq!(result.len(), 1);
410 assert!(
411 result[0]
412 .message
413 .contains("Strong emphasis should use ** instead of __")
414 );
415 assert_eq!(result[0].line, 1);
416 assert_eq!(result[0].column, 9);
417 }
418
419 #[test]
420 fn test_underscore_style_with_underscores() {
421 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
422 let content = "This is __strong text__ here.";
423 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
424 let result = rule.check(&ctx).unwrap();
425
426 assert_eq!(result.len(), 0);
427 }
428
429 #[test]
430 fn test_underscore_style_with_asterisks() {
431 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
432 let content = "This is **strong text** here.";
433 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
434 let result = rule.check(&ctx).unwrap();
435
436 assert_eq!(result.len(), 1);
437 assert!(
438 result[0]
439 .message
440 .contains("Strong emphasis should use __ instead of **")
441 );
442 }
443
444 #[test]
445 fn test_consistent_style_first_asterisk() {
446 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
447 let content = "First **strong** then __also strong__.";
448 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
449 let result = rule.check(&ctx).unwrap();
450
451 assert_eq!(result.len(), 1);
453 assert!(
454 result[0]
455 .message
456 .contains("Strong emphasis should use ** instead of __")
457 );
458 }
459
460 #[test]
461 fn test_consistent_style_tie_prefers_asterisk() {
462 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
463 let content = "First __strong__ then **also strong**.";
464 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
465 let result = rule.check(&ctx).unwrap();
466
467 assert_eq!(result.len(), 1);
470 assert!(
471 result[0]
472 .message
473 .contains("Strong emphasis should use ** instead of __")
474 );
475 }
476
477 #[test]
478 fn test_detect_style_asterisk() {
479 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
480 let ctx = LintContext::new("This has **strong** text.", crate::config::MarkdownFlavor::Standard);
481 let style = rule.detect_style(&ctx);
482
483 assert_eq!(style, Some(StrongStyle::Asterisk));
484 }
485
486 #[test]
487 fn test_detect_style_underscore() {
488 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
489 let ctx = LintContext::new("This has __strong__ text.", crate::config::MarkdownFlavor::Standard);
490 let style = rule.detect_style(&ctx);
491
492 assert_eq!(style, Some(StrongStyle::Underscore));
493 }
494
495 #[test]
496 fn test_detect_style_none() {
497 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
498 let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard);
499 let style = rule.detect_style(&ctx);
500
501 assert_eq!(style, None);
502 }
503
504 #[test]
505 fn test_strong_in_code_block() {
506 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
507 let content = "```\n__strong__ in code\n```\n__strong__ outside";
508 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
509 let result = rule.check(&ctx).unwrap();
510
511 assert_eq!(result.len(), 1);
513 assert_eq!(result[0].line, 4);
514 }
515
516 #[test]
517 fn test_strong_in_inline_code() {
518 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
519 let content = "Text with `__strong__` in code and __strong__ outside.";
520 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
521 let result = rule.check(&ctx).unwrap();
522
523 assert_eq!(result.len(), 1);
525 }
526
527 #[test]
528 fn test_escaped_strong() {
529 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
530 let content = "This is \\__not strong\\__ but __this is__.";
531 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
532 let result = rule.check(&ctx).unwrap();
533
534 assert_eq!(result.len(), 1);
536 assert_eq!(result[0].line, 1);
537 assert_eq!(result[0].column, 30);
538 }
539
540 #[test]
541 fn test_fix_asterisks_to_underscores() {
542 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
543 let content = "This is **strong** text.";
544 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
545 let fixed = rule.fix(&ctx).unwrap();
546
547 assert_eq!(fixed, "This is __strong__ text.");
548 }
549
550 #[test]
551 fn test_fix_underscores_to_asterisks() {
552 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
553 let content = "This is __strong__ text.";
554 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
555 let fixed = rule.fix(&ctx).unwrap();
556
557 assert_eq!(fixed, "This is **strong** text.");
558 }
559
560 #[test]
561 fn test_fix_multiple_strong() {
562 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
563 let content = "First __strong__ and second __also strong__.";
564 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
565 let fixed = rule.fix(&ctx).unwrap();
566
567 assert_eq!(fixed, "First **strong** and second **also strong**.");
568 }
569
570 #[test]
571 fn test_fix_preserves_code_blocks() {
572 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
573 let content = "```\n__strong__ in code\n```\n__strong__ outside";
574 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
575 let fixed = rule.fix(&ctx).unwrap();
576
577 assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
578 }
579
580 #[test]
581 fn test_multiline_content() {
582 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
583 let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
584 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
585 let result = rule.check(&ctx).unwrap();
586
587 assert_eq!(result.len(), 2);
588 assert_eq!(result[0].line, 1);
589 assert_eq!(result[1].line, 2);
590 }
591
592 #[test]
593 fn test_nested_emphasis() {
594 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
595 let content = "This has __strong with *emphasis* inside__.";
596 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
597 let result = rule.check(&ctx).unwrap();
598
599 assert_eq!(result.len(), 1);
600 }
601
602 #[test]
603 fn test_empty_content() {
604 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
605 let content = "";
606 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
607 let result = rule.check(&ctx).unwrap();
608
609 assert_eq!(result.len(), 0);
610 }
611
612 #[test]
613 fn test_default_config() {
614 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
615 let (name, _config) = rule.default_config_section().unwrap();
616 assert_eq!(name, "MD050");
617 }
618
619 #[test]
620 fn test_strong_in_links_not_flagged() {
621 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
622 let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
623
624Hint:
625
626- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
627
628
629[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
630 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
631 let result = rule.check(&ctx).unwrap();
632
633 assert_eq!(result.len(), 0);
635 }
636
637 #[test]
638 fn test_strong_in_links_vs_outside_links() {
639 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
640 let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
641
642Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
643
644This is __real strong text__ that should be flagged.
645
646[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
647 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
648 let result = rule.check(&ctx).unwrap();
649
650 assert_eq!(result.len(), 1);
652 assert!(
653 result[0]
654 .message
655 .contains("Strong emphasis should use ** instead of __")
656 );
657 assert!(result[0].line > 4); }
660
661 #[test]
662 fn test_front_matter_not_flagged() {
663 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
664 let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
665 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
666 let result = rule.check(&ctx).unwrap();
667
668 assert_eq!(result.len(), 1);
670 assert_eq!(result[0].line, 6);
671 assert!(
672 result[0]
673 .message
674 .contains("Strong emphasis should use ** instead of __")
675 );
676 }
677
678 #[test]
679 fn test_html_tags_not_flagged() {
680 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
681 let content = r#"# Test
682
683This has HTML with underscores:
684
685<iframe src="https://example.com/__init__/__repr__"> </iframe>
686
687This __should be flagged__ as inconsistent."#;
688 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
689 let result = rule.check(&ctx).unwrap();
690
691 assert_eq!(result.len(), 1);
693 assert_eq!(result[0].line, 7);
694 assert!(
695 result[0]
696 .message
697 .contains("Strong emphasis should use ** instead of __")
698 );
699 }
700}