1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use crate::utils::regex_cache::get_cached_regex;
7
8const REF_DEF_REGEX_STR: &str = r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#;
10
11mod md050_config;
12use md050_config::MD050Config;
13
14#[derive(Debug, Default, Clone)]
20pub struct MD050StrongStyle {
21 config: MD050Config,
22}
23
24impl MD050StrongStyle {
25 pub fn new(style: StrongStyle) -> Self {
26 Self {
27 config: MD050Config { style },
28 }
29 }
30
31 pub fn from_config_struct(config: MD050Config) -> Self {
32 Self { config }
33 }
34
35 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
37 for link in &ctx.links {
39 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
40 return true;
41 }
42 }
43
44 for image in &ctx.images {
46 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
47 return true;
48 }
49 }
50
51 if let Ok(re) = get_cached_regex(REF_DEF_REGEX_STR) {
53 for m in re.find_iter(ctx.content) {
54 if m.start() <= byte_pos && byte_pos < m.end() {
55 return true;
56 }
57 }
58 }
59
60 false
61 }
62
63 fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
65 for html_tag in ctx.html_tags().iter() {
67 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
70 return true;
71 }
72 }
73 false
74 }
75
76 fn is_in_html_code_content(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
79 let html_tags = ctx.html_tags();
80 let mut open_code_pos: Option<usize> = None;
81
82 for tag in html_tags.iter() {
83 if tag.byte_offset > byte_pos {
85 return open_code_pos.is_some();
86 }
87
88 if tag.tag_name == "code" {
89 if tag.is_self_closing {
90 continue;
92 } else if !tag.is_closing {
93 open_code_pos = Some(tag.byte_end);
95 } else if tag.is_closing && open_code_pos.is_some() {
96 if let Some(open_pos) = open_code_pos
98 && byte_pos >= open_pos
99 && byte_pos < tag.byte_offset
100 {
101 return true;
103 }
104 open_code_pos = None;
105 }
106 }
107 }
108
109 open_code_pos.is_some() && byte_pos >= open_code_pos.unwrap()
111 }
112
113 fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
114 let content = ctx.content;
115
116 let mut first_asterisk = None;
118 for m in BOLD_ASTERISK_REGEX.find_iter(content) {
119 let (line_num, _) = ctx.offset_to_line_col(m.start());
121 let in_front_matter = ctx
122 .line_info(line_num)
123 .map(|info| info.in_front_matter)
124 .unwrap_or(false);
125
126 if !in_front_matter
127 && !ctx.is_in_code_block_or_span(m.start())
128 && !self.is_in_link(ctx, m.start())
129 && !self.is_in_html_tag(ctx, m.start())
130 && !self.is_in_html_code_content(ctx, m.start())
131 {
132 first_asterisk = Some(m);
133 break;
134 }
135 }
136
137 let mut first_underscore = None;
138 for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
139 let (line_num, _) = ctx.offset_to_line_col(m.start());
141 let in_front_matter = ctx
142 .line_info(line_num)
143 .map(|info| info.in_front_matter)
144 .unwrap_or(false);
145
146 if !in_front_matter
147 && !ctx.is_in_code_block_or_span(m.start())
148 && !self.is_in_link(ctx, m.start())
149 && !self.is_in_html_tag(ctx, m.start())
150 && !self.is_in_html_code_content(ctx, m.start())
151 {
152 first_underscore = Some(m);
153 break;
154 }
155 }
156
157 match (first_asterisk, first_underscore) {
158 (Some(a), Some(u)) => {
159 if a.start() < u.start() {
161 Some(StrongStyle::Asterisk)
162 } else {
163 Some(StrongStyle::Underscore)
164 }
165 }
166 (Some(_), None) => Some(StrongStyle::Asterisk),
167 (None, Some(_)) => Some(StrongStyle::Underscore),
168 (None, None) => None,
169 }
170 }
171
172 fn is_escaped(&self, text: &str, pos: usize) -> bool {
173 if pos == 0 {
174 return false;
175 }
176
177 let mut backslash_count = 0;
178 let mut i = pos;
179 let bytes = text.as_bytes();
180 while i > 0 {
181 i -= 1;
182 if i < bytes.len() && bytes[i] != b'\\' {
184 break;
185 }
186 backslash_count += 1;
187 }
188 backslash_count % 2 == 1
189 }
190}
191
192impl Rule for MD050StrongStyle {
193 fn name(&self) -> &'static str {
194 "MD050"
195 }
196
197 fn description(&self) -> &'static str {
198 "Strong emphasis style should be consistent"
199 }
200
201 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
202 let content = ctx.content;
203 let line_index = LineIndex::new(content.to_string());
204
205 let mut warnings = Vec::new();
206
207 let target_style = match self.config.style {
208 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
209 _ => self.config.style,
210 };
211
212 let strong_regex = match target_style {
213 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
214 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
215 StrongStyle::Consistent => {
216 &*BOLD_UNDERSCORE_REGEX
219 }
220 };
221
222 for (line_num, line) in content.lines().enumerate() {
223 if let Some(line_info) = ctx.line_info(line_num + 1)
225 && line_info.in_front_matter
226 {
227 continue;
228 }
229
230 let byte_pos = line_index.get_line_start_byte(line_num + 1).unwrap_or(0);
231
232 for m in strong_regex.find_iter(line) {
233 let match_byte_pos = byte_pos + m.start();
235
236 if ctx.is_in_code_block_or_span(match_byte_pos)
238 || self.is_in_link(ctx, match_byte_pos)
239 || self.is_in_html_code_content(ctx, match_byte_pos)
240 {
241 continue;
242 }
243
244 let mut inside_html_tag = false;
247 for tag in ctx.html_tags().iter() {
248 if tag.byte_offset < match_byte_pos && match_byte_pos < tag.byte_end - 1 {
250 inside_html_tag = true;
251 break;
252 }
253 }
254 if inside_html_tag {
255 continue;
256 }
257
258 if !self.is_escaped(line, m.start()) {
259 let text = &line[m.start() + 2..m.end() - 2];
260 let message = match target_style {
261 StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
262 StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
263 StrongStyle::Consistent => {
264 "Strong emphasis should use ** instead of __"
267 }
268 };
269
270 let (start_line, start_col, end_line, end_col) =
272 calculate_match_range(line_num + 1, line, m.start(), m.len());
273
274 warnings.push(LintWarning {
275 rule_name: Some(self.name()),
276 line: start_line,
277 column: start_col,
278 end_line,
279 end_column: end_col,
280 message: message.to_string(),
281 severity: Severity::Warning,
282 fix: Some(Fix {
283 range: line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
284 replacement: match target_style {
285 StrongStyle::Asterisk => format!("**{text}**"),
286 StrongStyle::Underscore => format!("__{text}__"),
287 StrongStyle::Consistent => {
288 format!("**{text}**")
291 }
292 },
293 }),
294 });
295 }
296 }
297 }
298
299 Ok(warnings)
300 }
301
302 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
303 let content = ctx.content;
304
305 let target_style = match self.config.style {
306 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
307 _ => self.config.style,
308 };
309
310 let strong_regex = match target_style {
311 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
312 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
313 StrongStyle::Consistent => {
314 &*BOLD_UNDERSCORE_REGEX
317 }
318 };
319
320 let matches: Vec<(usize, usize)> = strong_regex
323 .find_iter(content)
324 .filter(|m| {
325 let (line_num, _) = ctx.offset_to_line_col(m.start());
327 if let Some(line_info) = ctx.line_info(line_num)
328 && line_info.in_front_matter
329 {
330 return false;
331 }
332 !ctx.is_in_code_block_or_span(m.start())
333 && !self.is_in_link(ctx, m.start())
334 && !self.is_in_html_tag(ctx, m.start())
335 && !self.is_in_html_code_content(ctx, m.start())
336 })
337 .filter(|m| !self.is_escaped(content, m.start()))
338 .map(|m| (m.start(), m.end()))
339 .collect();
340
341 let mut result = content.to_string();
344 for (start, end) in matches.into_iter().rev() {
345 let text = &result[start + 2..end - 2];
346 let replacement = match target_style {
347 StrongStyle::Asterisk => format!("**{text}**"),
348 StrongStyle::Underscore => format!("__{text}__"),
349 StrongStyle::Consistent => {
350 format!("**{text}**")
353 }
354 };
355 result.replace_range(start..end, &replacement);
356 }
357
358 Ok(result)
359 }
360
361 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
363 ctx.content.is_empty() || !ctx.likely_has_emphasis()
365 }
366
367 fn as_any(&self) -> &dyn std::any::Any {
368 self
369 }
370
371 fn default_config_section(&self) -> Option<(String, toml::Value)> {
372 let json_value = serde_json::to_value(&self.config).ok()?;
373 Some((
374 self.name().to_string(),
375 crate::rule_config_serde::json_to_toml_value(&json_value)?,
376 ))
377 }
378
379 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
380 where
381 Self: Sized,
382 {
383 let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
384 Box::new(Self::from_config_struct(rule_config))
385 }
386}
387
388#[cfg(test)]
389mod tests {
390 use super::*;
391 use crate::lint_context::LintContext;
392
393 #[test]
394 fn test_asterisk_style_with_asterisks() {
395 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
396 let content = "This is **strong text** here.";
397 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
398 let result = rule.check(&ctx).unwrap();
399
400 assert_eq!(result.len(), 0);
401 }
402
403 #[test]
404 fn test_asterisk_style_with_underscores() {
405 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
406 let content = "This is __strong text__ here.";
407 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
408 let result = rule.check(&ctx).unwrap();
409
410 assert_eq!(result.len(), 1);
411 assert!(
412 result[0]
413 .message
414 .contains("Strong emphasis should use ** instead of __")
415 );
416 assert_eq!(result[0].line, 1);
417 assert_eq!(result[0].column, 9);
418 }
419
420 #[test]
421 fn test_underscore_style_with_underscores() {
422 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
423 let content = "This is __strong text__ here.";
424 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
425 let result = rule.check(&ctx).unwrap();
426
427 assert_eq!(result.len(), 0);
428 }
429
430 #[test]
431 fn test_underscore_style_with_asterisks() {
432 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
433 let content = "This is **strong text** here.";
434 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
435 let result = rule.check(&ctx).unwrap();
436
437 assert_eq!(result.len(), 1);
438 assert!(
439 result[0]
440 .message
441 .contains("Strong emphasis should use __ instead of **")
442 );
443 }
444
445 #[test]
446 fn test_consistent_style_first_asterisk() {
447 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
448 let content = "First **strong** then __also strong__.";
449 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
450 let result = rule.check(&ctx).unwrap();
451
452 assert_eq!(result.len(), 1);
454 assert!(
455 result[0]
456 .message
457 .contains("Strong emphasis should use ** instead of __")
458 );
459 }
460
461 #[test]
462 fn test_consistent_style_first_underscore() {
463 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
464 let content = "First __strong__ then **also strong**.";
465 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
466 let result = rule.check(&ctx).unwrap();
467
468 assert_eq!(result.len(), 1);
470 assert!(
471 result[0]
472 .message
473 .contains("Strong emphasis should use __ instead of **")
474 );
475 }
476
477 #[test]
478 fn test_detect_style_asterisk() {
479 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
480 let ctx = LintContext::new("This has **strong** text.", crate::config::MarkdownFlavor::Standard);
481 let style = rule.detect_style(&ctx);
482
483 assert_eq!(style, Some(StrongStyle::Asterisk));
484 }
485
486 #[test]
487 fn test_detect_style_underscore() {
488 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
489 let ctx = LintContext::new("This has __strong__ text.", crate::config::MarkdownFlavor::Standard);
490 let style = rule.detect_style(&ctx);
491
492 assert_eq!(style, Some(StrongStyle::Underscore));
493 }
494
495 #[test]
496 fn test_detect_style_none() {
497 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
498 let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard);
499 let style = rule.detect_style(&ctx);
500
501 assert_eq!(style, None);
502 }
503
504 #[test]
505 fn test_strong_in_code_block() {
506 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
507 let content = "```\n__strong__ in code\n```\n__strong__ outside";
508 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
509 let result = rule.check(&ctx).unwrap();
510
511 assert_eq!(result.len(), 1);
513 assert_eq!(result[0].line, 4);
514 }
515
516 #[test]
517 fn test_strong_in_inline_code() {
518 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
519 let content = "Text with `__strong__` in code and __strong__ outside.";
520 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
521 let result = rule.check(&ctx).unwrap();
522
523 assert_eq!(result.len(), 1);
525 }
526
527 #[test]
528 fn test_escaped_strong() {
529 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
530 let content = "This is \\__not strong\\__ but __this is__.";
531 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
532 let result = rule.check(&ctx).unwrap();
533
534 assert_eq!(result.len(), 1);
536 assert_eq!(result[0].line, 1);
537 assert_eq!(result[0].column, 30);
538 }
539
540 #[test]
541 fn test_fix_asterisks_to_underscores() {
542 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
543 let content = "This is **strong** text.";
544 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
545 let fixed = rule.fix(&ctx).unwrap();
546
547 assert_eq!(fixed, "This is __strong__ text.");
548 }
549
550 #[test]
551 fn test_fix_underscores_to_asterisks() {
552 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
553 let content = "This is __strong__ text.";
554 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
555 let fixed = rule.fix(&ctx).unwrap();
556
557 assert_eq!(fixed, "This is **strong** text.");
558 }
559
560 #[test]
561 fn test_fix_multiple_strong() {
562 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
563 let content = "First __strong__ and second __also strong__.";
564 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
565 let fixed = rule.fix(&ctx).unwrap();
566
567 assert_eq!(fixed, "First **strong** and second **also strong**.");
568 }
569
570 #[test]
571 fn test_fix_preserves_code_blocks() {
572 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
573 let content = "```\n__strong__ in code\n```\n__strong__ outside";
574 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
575 let fixed = rule.fix(&ctx).unwrap();
576
577 assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
578 }
579
580 #[test]
581 fn test_multiline_content() {
582 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
583 let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
584 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
585 let result = rule.check(&ctx).unwrap();
586
587 assert_eq!(result.len(), 2);
588 assert_eq!(result[0].line, 1);
589 assert_eq!(result[1].line, 2);
590 }
591
592 #[test]
593 fn test_nested_emphasis() {
594 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
595 let content = "This has __strong with *emphasis* inside__.";
596 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
597 let result = rule.check(&ctx).unwrap();
598
599 assert_eq!(result.len(), 1);
600 }
601
602 #[test]
603 fn test_empty_content() {
604 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
605 let content = "";
606 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
607 let result = rule.check(&ctx).unwrap();
608
609 assert_eq!(result.len(), 0);
610 }
611
612 #[test]
613 fn test_default_config() {
614 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
615 let (name, _config) = rule.default_config_section().unwrap();
616 assert_eq!(name, "MD050");
617 }
618
619 #[test]
620 fn test_strong_in_links_not_flagged() {
621 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
622 let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
623
624Hint:
625
626- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
627
628
629[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
630 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
631 let result = rule.check(&ctx).unwrap();
632
633 assert_eq!(result.len(), 0);
635 }
636
637 #[test]
638 fn test_strong_in_links_vs_outside_links() {
639 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
640 let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
641
642Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
643
644This is __real strong text__ that should be flagged.
645
646[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
647 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
648 let result = rule.check(&ctx).unwrap();
649
650 assert_eq!(result.len(), 1);
652 assert!(
653 result[0]
654 .message
655 .contains("Strong emphasis should use ** instead of __")
656 );
657 assert!(result[0].line > 4); }
660
661 #[test]
662 fn test_front_matter_not_flagged() {
663 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
664 let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
665 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
666 let result = rule.check(&ctx).unwrap();
667
668 assert_eq!(result.len(), 1);
670 assert_eq!(result[0].line, 6);
671 assert!(
672 result[0]
673 .message
674 .contains("Strong emphasis should use ** instead of __")
675 );
676 }
677
678 #[test]
679 fn test_html_tags_not_flagged() {
680 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
681 let content = r#"# Test
682
683This has HTML with underscores:
684
685<iframe src="https://example.com/__init__/__repr__"> </iframe>
686
687This __should be flagged__ as inconsistent."#;
688 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
689 let result = rule.check(&ctx).unwrap();
690
691 assert_eq!(result.len(), 1);
693 assert_eq!(result[0].line, 7);
694 assert!(
695 result[0]
696 .message
697 .contains("Strong emphasis should use ** instead of __")
698 );
699 }
700}