1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use lazy_static::lazy_static;
7use regex::Regex;
8
9lazy_static! {
10 static ref REF_DEF_REGEX: Regex = Regex::new(
12 r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
13 ).unwrap();
14}
15
16mod md050_config;
17use md050_config::MD050Config;
18
19#[derive(Debug, Default, Clone)]
25pub struct MD050StrongStyle {
26 config: MD050Config,
27}
28
29impl MD050StrongStyle {
30 pub fn new(style: StrongStyle) -> Self {
31 Self {
32 config: MD050Config { style },
33 }
34 }
35
36 pub fn from_config_struct(config: MD050Config) -> Self {
37 Self { config }
38 }
39
40 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
42 for link in &ctx.links {
44 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
45 return true;
46 }
47 }
48
49 for image in &ctx.images {
51 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
52 return true;
53 }
54 }
55
56 for m in REF_DEF_REGEX.find_iter(ctx.content) {
58 if m.start() <= byte_pos && byte_pos < m.end() {
59 return true;
60 }
61 }
62
63 false
64 }
65
66 fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
68 for html_tag in ctx.html_tags().iter() {
70 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
73 return true;
74 }
75 }
76 false
77 }
78
79 fn is_in_html_code_content(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
82 let html_tags = ctx.html_tags();
83 let mut open_code_pos: Option<usize> = None;
84
85 for tag in html_tags.iter() {
86 if tag.byte_offset > byte_pos {
88 return open_code_pos.is_some();
89 }
90
91 if tag.tag_name == "code" {
92 if tag.is_self_closing {
93 continue;
95 } else if !tag.is_closing {
96 open_code_pos = Some(tag.byte_end);
98 } else if tag.is_closing && open_code_pos.is_some() {
99 if let Some(open_pos) = open_code_pos
101 && byte_pos >= open_pos
102 && byte_pos < tag.byte_offset
103 {
104 return true;
106 }
107 open_code_pos = None;
108 }
109 }
110 }
111
112 open_code_pos.is_some() && byte_pos >= open_code_pos.unwrap()
114 }
115
116 fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
117 let content = ctx.content;
118
119 let mut first_asterisk = None;
121 for m in BOLD_ASTERISK_REGEX.find_iter(content) {
122 let (line_num, _) = ctx.offset_to_line_col(m.start());
124 let in_front_matter = ctx
125 .line_info(line_num)
126 .map(|info| info.in_front_matter)
127 .unwrap_or(false);
128
129 if !in_front_matter
130 && !ctx.is_in_code_block_or_span(m.start())
131 && !self.is_in_link(ctx, m.start())
132 && !self.is_in_html_tag(ctx, m.start())
133 && !self.is_in_html_code_content(ctx, m.start())
134 {
135 first_asterisk = Some(m);
136 break;
137 }
138 }
139
140 let mut first_underscore = None;
141 for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
142 let (line_num, _) = ctx.offset_to_line_col(m.start());
144 let in_front_matter = ctx
145 .line_info(line_num)
146 .map(|info| info.in_front_matter)
147 .unwrap_or(false);
148
149 if !in_front_matter
150 && !ctx.is_in_code_block_or_span(m.start())
151 && !self.is_in_link(ctx, m.start())
152 && !self.is_in_html_tag(ctx, m.start())
153 && !self.is_in_html_code_content(ctx, m.start())
154 {
155 first_underscore = Some(m);
156 break;
157 }
158 }
159
160 match (first_asterisk, first_underscore) {
161 (Some(a), Some(u)) => {
162 if a.start() < u.start() {
164 Some(StrongStyle::Asterisk)
165 } else {
166 Some(StrongStyle::Underscore)
167 }
168 }
169 (Some(_), None) => Some(StrongStyle::Asterisk),
170 (None, Some(_)) => Some(StrongStyle::Underscore),
171 (None, None) => None,
172 }
173 }
174
175 fn is_escaped(&self, text: &str, pos: usize) -> bool {
176 if pos == 0 {
177 return false;
178 }
179
180 let mut backslash_count = 0;
181 let mut i = pos;
182 let bytes = text.as_bytes();
183 while i > 0 {
184 i -= 1;
185 if i < bytes.len() && bytes[i] != b'\\' {
187 break;
188 }
189 backslash_count += 1;
190 }
191 backslash_count % 2 == 1
192 }
193}
194
195impl Rule for MD050StrongStyle {
196 fn name(&self) -> &'static str {
197 "MD050"
198 }
199
200 fn description(&self) -> &'static str {
201 "Strong emphasis style should be consistent"
202 }
203
204 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
205 let content = ctx.content;
206 let _line_index = LineIndex::new(content.to_string());
207
208 let mut warnings = Vec::new();
209
210 let target_style = match self.config.style {
211 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
212 _ => self.config.style,
213 };
214
215 let strong_regex = match target_style {
216 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
217 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
218 StrongStyle::Consistent => {
219 &*BOLD_UNDERSCORE_REGEX
222 }
223 };
224
225 let mut byte_pos = 0;
227
228 for (line_num, line) in content.lines().enumerate() {
229 if let Some(line_info) = ctx.line_info(line_num + 1)
231 && line_info.in_front_matter
232 {
233 byte_pos += line.len() + 1; continue;
235 }
236
237 for m in strong_regex.find_iter(line) {
238 let match_byte_pos = byte_pos + m.start();
240
241 if ctx.is_in_code_block_or_span(match_byte_pos)
243 || self.is_in_link(ctx, match_byte_pos)
244 || self.is_in_html_code_content(ctx, match_byte_pos)
245 {
246 continue;
247 }
248
249 let mut inside_html_tag = false;
252 for tag in ctx.html_tags().iter() {
253 if tag.byte_offset < match_byte_pos && match_byte_pos < tag.byte_end - 1 {
255 inside_html_tag = true;
256 break;
257 }
258 }
259 if inside_html_tag {
260 continue;
261 }
262
263 if !self.is_escaped(line, m.start()) {
264 let text = &line[m.start() + 2..m.end() - 2];
265 let message = match target_style {
266 StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
267 StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
268 StrongStyle::Consistent => {
269 "Strong emphasis should use ** instead of __"
272 }
273 };
274
275 let (start_line, start_col, end_line, end_col) =
277 calculate_match_range(line_num + 1, line, m.start(), m.len());
278
279 warnings.push(LintWarning {
280 rule_name: Some(self.name()),
281 line: start_line,
282 column: start_col,
283 end_line,
284 end_column: end_col,
285 message: message.to_string(),
286 severity: Severity::Warning,
287 fix: Some(Fix {
288 range: _line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
289 replacement: match target_style {
290 StrongStyle::Asterisk => format!("**{text}**"),
291 StrongStyle::Underscore => format!("__{text}__"),
292 StrongStyle::Consistent => {
293 format!("**{text}**")
296 }
297 },
298 }),
299 });
300 }
301 }
302
303 byte_pos += line.len() + 1; }
306
307 Ok(warnings)
308 }
309
310 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
311 let content = ctx.content;
312
313 let target_style = match self.config.style {
314 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
315 _ => self.config.style,
316 };
317
318 let strong_regex = match target_style {
319 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
320 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
321 StrongStyle::Consistent => {
322 &*BOLD_UNDERSCORE_REGEX
325 }
326 };
327
328 let matches: Vec<(usize, usize)> = strong_regex
331 .find_iter(content)
332 .filter(|m| {
333 let (line_num, _) = ctx.offset_to_line_col(m.start());
335 if let Some(line_info) = ctx.line_info(line_num)
336 && line_info.in_front_matter
337 {
338 return false;
339 }
340 !ctx.is_in_code_block_or_span(m.start())
341 && !self.is_in_link(ctx, m.start())
342 && !self.is_in_html_tag(ctx, m.start())
343 && !self.is_in_html_code_content(ctx, m.start())
344 })
345 .filter(|m| !self.is_escaped(content, m.start()))
346 .map(|m| (m.start(), m.end()))
347 .collect();
348
349 let mut result = content.to_string();
352 for (start, end) in matches.into_iter().rev() {
353 let text = &result[start + 2..end - 2];
354 let replacement = match target_style {
355 StrongStyle::Asterisk => format!("**{text}**"),
356 StrongStyle::Underscore => format!("__{text}__"),
357 StrongStyle::Consistent => {
358 format!("**{text}**")
361 }
362 };
363 result.replace_range(start..end, &replacement);
364 }
365
366 Ok(result)
367 }
368
369 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
371 ctx.content.is_empty() || (!ctx.content.contains("**") && !ctx.content.contains("__"))
372 }
373
374 fn as_any(&self) -> &dyn std::any::Any {
375 self
376 }
377
378 fn default_config_section(&self) -> Option<(String, toml::Value)> {
379 let json_value = serde_json::to_value(&self.config).ok()?;
380 Some((
381 self.name().to_string(),
382 crate::rule_config_serde::json_to_toml_value(&json_value)?,
383 ))
384 }
385
386 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
387 where
388 Self: Sized,
389 {
390 let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
391 Box::new(Self::from_config_struct(rule_config))
392 }
393}
394
395#[cfg(test)]
396mod tests {
397 use super::*;
398 use crate::lint_context::LintContext;
399
400 #[test]
401 fn test_asterisk_style_with_asterisks() {
402 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
403 let content = "This is **strong text** here.";
404 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
405 let result = rule.check(&ctx).unwrap();
406
407 assert_eq!(result.len(), 0);
408 }
409
410 #[test]
411 fn test_asterisk_style_with_underscores() {
412 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
413 let content = "This is __strong text__ here.";
414 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
415 let result = rule.check(&ctx).unwrap();
416
417 assert_eq!(result.len(), 1);
418 assert!(
419 result[0]
420 .message
421 .contains("Strong emphasis should use ** instead of __")
422 );
423 assert_eq!(result[0].line, 1);
424 assert_eq!(result[0].column, 9);
425 }
426
427 #[test]
428 fn test_underscore_style_with_underscores() {
429 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
430 let content = "This is __strong text__ here.";
431 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
432 let result = rule.check(&ctx).unwrap();
433
434 assert_eq!(result.len(), 0);
435 }
436
437 #[test]
438 fn test_underscore_style_with_asterisks() {
439 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
440 let content = "This is **strong text** here.";
441 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
442 let result = rule.check(&ctx).unwrap();
443
444 assert_eq!(result.len(), 1);
445 assert!(
446 result[0]
447 .message
448 .contains("Strong emphasis should use __ instead of **")
449 );
450 }
451
452 #[test]
453 fn test_consistent_style_first_asterisk() {
454 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
455 let content = "First **strong** then __also strong__.";
456 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
457 let result = rule.check(&ctx).unwrap();
458
459 assert_eq!(result.len(), 1);
461 assert!(
462 result[0]
463 .message
464 .contains("Strong emphasis should use ** instead of __")
465 );
466 }
467
468 #[test]
469 fn test_consistent_style_first_underscore() {
470 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
471 let content = "First __strong__ then **also strong**.";
472 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
473 let result = rule.check(&ctx).unwrap();
474
475 assert_eq!(result.len(), 1);
477 assert!(
478 result[0]
479 .message
480 .contains("Strong emphasis should use __ instead of **")
481 );
482 }
483
484 #[test]
485 fn test_detect_style_asterisk() {
486 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
487 let ctx = LintContext::new("This has **strong** text.", crate::config::MarkdownFlavor::Standard);
488 let style = rule.detect_style(&ctx);
489
490 assert_eq!(style, Some(StrongStyle::Asterisk));
491 }
492
493 #[test]
494 fn test_detect_style_underscore() {
495 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
496 let ctx = LintContext::new("This has __strong__ text.", crate::config::MarkdownFlavor::Standard);
497 let style = rule.detect_style(&ctx);
498
499 assert_eq!(style, Some(StrongStyle::Underscore));
500 }
501
502 #[test]
503 fn test_detect_style_none() {
504 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
505 let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard);
506 let style = rule.detect_style(&ctx);
507
508 assert_eq!(style, None);
509 }
510
511 #[test]
512 fn test_strong_in_code_block() {
513 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
514 let content = "```\n__strong__ in code\n```\n__strong__ outside";
515 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
516 let result = rule.check(&ctx).unwrap();
517
518 assert_eq!(result.len(), 1);
520 assert_eq!(result[0].line, 4);
521 }
522
523 #[test]
524 fn test_strong_in_inline_code() {
525 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
526 let content = "Text with `__strong__` in code and __strong__ outside.";
527 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
528 let result = rule.check(&ctx).unwrap();
529
530 assert_eq!(result.len(), 1);
532 }
533
534 #[test]
535 fn test_escaped_strong() {
536 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
537 let content = "This is \\__not strong\\__ but __this is__.";
538 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
539 let result = rule.check(&ctx).unwrap();
540
541 assert_eq!(result.len(), 1);
543 assert_eq!(result[0].line, 1);
544 assert_eq!(result[0].column, 30);
545 }
546
547 #[test]
548 fn test_fix_asterisks_to_underscores() {
549 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
550 let content = "This is **strong** text.";
551 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
552 let fixed = rule.fix(&ctx).unwrap();
553
554 assert_eq!(fixed, "This is __strong__ text.");
555 }
556
557 #[test]
558 fn test_fix_underscores_to_asterisks() {
559 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
560 let content = "This is __strong__ text.";
561 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
562 let fixed = rule.fix(&ctx).unwrap();
563
564 assert_eq!(fixed, "This is **strong** text.");
565 }
566
567 #[test]
568 fn test_fix_multiple_strong() {
569 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
570 let content = "First __strong__ and second __also strong__.";
571 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
572 let fixed = rule.fix(&ctx).unwrap();
573
574 assert_eq!(fixed, "First **strong** and second **also strong**.");
575 }
576
577 #[test]
578 fn test_fix_preserves_code_blocks() {
579 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
580 let content = "```\n__strong__ in code\n```\n__strong__ outside";
581 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
582 let fixed = rule.fix(&ctx).unwrap();
583
584 assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
585 }
586
587 #[test]
588 fn test_multiline_content() {
589 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
590 let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
591 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
592 let result = rule.check(&ctx).unwrap();
593
594 assert_eq!(result.len(), 2);
595 assert_eq!(result[0].line, 1);
596 assert_eq!(result[1].line, 2);
597 }
598
599 #[test]
600 fn test_nested_emphasis() {
601 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
602 let content = "This has __strong with *emphasis* inside__.";
603 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
604 let result = rule.check(&ctx).unwrap();
605
606 assert_eq!(result.len(), 1);
607 }
608
609 #[test]
610 fn test_empty_content() {
611 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
612 let content = "";
613 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
614 let result = rule.check(&ctx).unwrap();
615
616 assert_eq!(result.len(), 0);
617 }
618
619 #[test]
620 fn test_default_config() {
621 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
622 let (name, _config) = rule.default_config_section().unwrap();
623 assert_eq!(name, "MD050");
624 }
625
626 #[test]
627 fn test_strong_in_links_not_flagged() {
628 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
629 let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
630
631Hint:
632
633- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
634
635
636[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
637 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
638 let result = rule.check(&ctx).unwrap();
639
640 assert_eq!(result.len(), 0);
642 }
643
644 #[test]
645 fn test_strong_in_links_vs_outside_links() {
646 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
647 let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
648
649Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
650
651This is __real strong text__ that should be flagged.
652
653[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
654 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
655 let result = rule.check(&ctx).unwrap();
656
657 assert_eq!(result.len(), 1);
659 assert!(
660 result[0]
661 .message
662 .contains("Strong emphasis should use ** instead of __")
663 );
664 assert!(result[0].line > 4); }
667
668 #[test]
669 fn test_front_matter_not_flagged() {
670 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
671 let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
672 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
673 let result = rule.check(&ctx).unwrap();
674
675 assert_eq!(result.len(), 1);
677 assert_eq!(result[0].line, 6);
678 assert!(
679 result[0]
680 .message
681 .contains("Strong emphasis should use ** instead of __")
682 );
683 }
684
685 #[test]
686 fn test_html_tags_not_flagged() {
687 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
688 let content = r#"# Test
689
690This has HTML with underscores:
691
692<iframe src="https://example.com/__init__/__repr__"> </iframe>
693
694This __should be flagged__ as inconsistent."#;
695 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
696 let result = rule.check(&ctx).unwrap();
697
698 assert_eq!(result.len(), 1);
700 assert_eq!(result[0].line, 7);
701 assert!(
702 result[0]
703 .message
704 .contains("Strong emphasis should use ** instead of __")
705 );
706 }
707}