1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use crate::utils::regex_cache::get_cached_regex;
7
8const REF_DEF_REGEX_STR: &str = r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#;
10
11mod md050_config;
12use md050_config::MD050Config;
13
14#[derive(Debug, Default, Clone)]
20pub struct MD050StrongStyle {
21 config: MD050Config,
22}
23
24impl MD050StrongStyle {
25 pub fn new(style: StrongStyle) -> Self {
26 Self {
27 config: MD050Config { style },
28 }
29 }
30
31 pub fn from_config_struct(config: MD050Config) -> Self {
32 Self { config }
33 }
34
35 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
37 for link in &ctx.links {
39 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
40 return true;
41 }
42 }
43
44 for image in &ctx.images {
46 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
47 return true;
48 }
49 }
50
51 if let Ok(re) = get_cached_regex(REF_DEF_REGEX_STR) {
53 for m in re.find_iter(ctx.content) {
54 if m.start() <= byte_pos && byte_pos < m.end() {
55 return true;
56 }
57 }
58 }
59
60 false
61 }
62
63 fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
65 for html_tag in ctx.html_tags().iter() {
67 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
70 return true;
71 }
72 }
73 false
74 }
75
76 fn is_in_html_code_content(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
79 let html_tags = ctx.html_tags();
80 let mut open_code_pos: Option<usize> = None;
81
82 for tag in html_tags.iter() {
83 if tag.byte_offset > byte_pos {
85 return open_code_pos.is_some();
86 }
87
88 if tag.tag_name == "code" {
89 if tag.is_self_closing {
90 continue;
92 } else if !tag.is_closing {
93 open_code_pos = Some(tag.byte_end);
95 } else if tag.is_closing && open_code_pos.is_some() {
96 if let Some(open_pos) = open_code_pos
98 && byte_pos >= open_pos
99 && byte_pos < tag.byte_offset
100 {
101 return true;
103 }
104 open_code_pos = None;
105 }
106 }
107 }
108
109 open_code_pos.is_some() && byte_pos >= open_code_pos.unwrap()
111 }
112
113 fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
114 let content = ctx.content;
115
116 let mut first_asterisk = None;
118 for m in BOLD_ASTERISK_REGEX.find_iter(content) {
119 let (line_num, _) = ctx.offset_to_line_col(m.start());
121 let in_front_matter = ctx
122 .line_info(line_num)
123 .map(|info| info.in_front_matter)
124 .unwrap_or(false);
125
126 if !in_front_matter
127 && !ctx.is_in_code_block_or_span(m.start())
128 && !self.is_in_link(ctx, m.start())
129 && !self.is_in_html_tag(ctx, m.start())
130 && !self.is_in_html_code_content(ctx, m.start())
131 {
132 first_asterisk = Some(m);
133 break;
134 }
135 }
136
137 let mut first_underscore = None;
138 for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
139 let (line_num, _) = ctx.offset_to_line_col(m.start());
141 let in_front_matter = ctx
142 .line_info(line_num)
143 .map(|info| info.in_front_matter)
144 .unwrap_or(false);
145
146 if !in_front_matter
147 && !ctx.is_in_code_block_or_span(m.start())
148 && !self.is_in_link(ctx, m.start())
149 && !self.is_in_html_tag(ctx, m.start())
150 && !self.is_in_html_code_content(ctx, m.start())
151 {
152 first_underscore = Some(m);
153 break;
154 }
155 }
156
157 match (first_asterisk, first_underscore) {
158 (Some(a), Some(u)) => {
159 if a.start() < u.start() {
161 Some(StrongStyle::Asterisk)
162 } else {
163 Some(StrongStyle::Underscore)
164 }
165 }
166 (Some(_), None) => Some(StrongStyle::Asterisk),
167 (None, Some(_)) => Some(StrongStyle::Underscore),
168 (None, None) => None,
169 }
170 }
171
172 fn is_escaped(&self, text: &str, pos: usize) -> bool {
173 if pos == 0 {
174 return false;
175 }
176
177 let mut backslash_count = 0;
178 let mut i = pos;
179 let bytes = text.as_bytes();
180 while i > 0 {
181 i -= 1;
182 if i < bytes.len() && bytes[i] != b'\\' {
184 break;
185 }
186 backslash_count += 1;
187 }
188 backslash_count % 2 == 1
189 }
190}
191
192impl Rule for MD050StrongStyle {
193 fn name(&self) -> &'static str {
194 "MD050"
195 }
196
197 fn description(&self) -> &'static str {
198 "Strong emphasis style should be consistent"
199 }
200
201 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
202 let content = ctx.content;
203 let _line_index = LineIndex::new(content.to_string());
204
205 let mut warnings = Vec::new();
206
207 let target_style = match self.config.style {
208 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
209 _ => self.config.style,
210 };
211
212 let strong_regex = match target_style {
213 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
214 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
215 StrongStyle::Consistent => {
216 &*BOLD_UNDERSCORE_REGEX
219 }
220 };
221
222 let mut byte_pos = 0;
224
225 for (line_num, line) in content.lines().enumerate() {
226 if let Some(line_info) = ctx.line_info(line_num + 1)
228 && line_info.in_front_matter
229 {
230 byte_pos += line.len() + 1; continue;
232 }
233
234 for m in strong_regex.find_iter(line) {
235 let match_byte_pos = byte_pos + m.start();
237
238 if ctx.is_in_code_block_or_span(match_byte_pos)
240 || self.is_in_link(ctx, match_byte_pos)
241 || self.is_in_html_code_content(ctx, match_byte_pos)
242 {
243 continue;
244 }
245
246 let mut inside_html_tag = false;
249 for tag in ctx.html_tags().iter() {
250 if tag.byte_offset < match_byte_pos && match_byte_pos < tag.byte_end - 1 {
252 inside_html_tag = true;
253 break;
254 }
255 }
256 if inside_html_tag {
257 continue;
258 }
259
260 if !self.is_escaped(line, m.start()) {
261 let text = &line[m.start() + 2..m.end() - 2];
262 let message = match target_style {
263 StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
264 StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
265 StrongStyle::Consistent => {
266 "Strong emphasis should use ** instead of __"
269 }
270 };
271
272 let (start_line, start_col, end_line, end_col) =
274 calculate_match_range(line_num + 1, line, m.start(), m.len());
275
276 warnings.push(LintWarning {
277 rule_name: Some(self.name()),
278 line: start_line,
279 column: start_col,
280 end_line,
281 end_column: end_col,
282 message: message.to_string(),
283 severity: Severity::Warning,
284 fix: Some(Fix {
285 range: _line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
286 replacement: match target_style {
287 StrongStyle::Asterisk => format!("**{text}**"),
288 StrongStyle::Underscore => format!("__{text}__"),
289 StrongStyle::Consistent => {
290 format!("**{text}**")
293 }
294 },
295 }),
296 });
297 }
298 }
299
300 byte_pos += line.len() + 1; }
303
304 Ok(warnings)
305 }
306
307 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
308 let content = ctx.content;
309
310 let target_style = match self.config.style {
311 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
312 _ => self.config.style,
313 };
314
315 let strong_regex = match target_style {
316 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
317 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
318 StrongStyle::Consistent => {
319 &*BOLD_UNDERSCORE_REGEX
322 }
323 };
324
325 let matches: Vec<(usize, usize)> = strong_regex
328 .find_iter(content)
329 .filter(|m| {
330 let (line_num, _) = ctx.offset_to_line_col(m.start());
332 if let Some(line_info) = ctx.line_info(line_num)
333 && line_info.in_front_matter
334 {
335 return false;
336 }
337 !ctx.is_in_code_block_or_span(m.start())
338 && !self.is_in_link(ctx, m.start())
339 && !self.is_in_html_tag(ctx, m.start())
340 && !self.is_in_html_code_content(ctx, m.start())
341 })
342 .filter(|m| !self.is_escaped(content, m.start()))
343 .map(|m| (m.start(), m.end()))
344 .collect();
345
346 let mut result = content.to_string();
349 for (start, end) in matches.into_iter().rev() {
350 let text = &result[start + 2..end - 2];
351 let replacement = match target_style {
352 StrongStyle::Asterisk => format!("**{text}**"),
353 StrongStyle::Underscore => format!("__{text}__"),
354 StrongStyle::Consistent => {
355 format!("**{text}**")
358 }
359 };
360 result.replace_range(start..end, &replacement);
361 }
362
363 Ok(result)
364 }
365
366 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
368 ctx.content.is_empty() || (!ctx.content.contains("**") && !ctx.content.contains("__"))
369 }
370
371 fn as_any(&self) -> &dyn std::any::Any {
372 self
373 }
374
375 fn default_config_section(&self) -> Option<(String, toml::Value)> {
376 let json_value = serde_json::to_value(&self.config).ok()?;
377 Some((
378 self.name().to_string(),
379 crate::rule_config_serde::json_to_toml_value(&json_value)?,
380 ))
381 }
382
383 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
384 where
385 Self: Sized,
386 {
387 let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
388 Box::new(Self::from_config_struct(rule_config))
389 }
390}
391
392#[cfg(test)]
393mod tests {
394 use super::*;
395 use crate::lint_context::LintContext;
396
397 #[test]
398 fn test_asterisk_style_with_asterisks() {
399 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
400 let content = "This is **strong text** here.";
401 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
402 let result = rule.check(&ctx).unwrap();
403
404 assert_eq!(result.len(), 0);
405 }
406
407 #[test]
408 fn test_asterisk_style_with_underscores() {
409 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
410 let content = "This is __strong text__ here.";
411 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
412 let result = rule.check(&ctx).unwrap();
413
414 assert_eq!(result.len(), 1);
415 assert!(
416 result[0]
417 .message
418 .contains("Strong emphasis should use ** instead of __")
419 );
420 assert_eq!(result[0].line, 1);
421 assert_eq!(result[0].column, 9);
422 }
423
424 #[test]
425 fn test_underscore_style_with_underscores() {
426 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
427 let content = "This is __strong text__ here.";
428 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
429 let result = rule.check(&ctx).unwrap();
430
431 assert_eq!(result.len(), 0);
432 }
433
434 #[test]
435 fn test_underscore_style_with_asterisks() {
436 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
437 let content = "This is **strong text** here.";
438 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
439 let result = rule.check(&ctx).unwrap();
440
441 assert_eq!(result.len(), 1);
442 assert!(
443 result[0]
444 .message
445 .contains("Strong emphasis should use __ instead of **")
446 );
447 }
448
449 #[test]
450 fn test_consistent_style_first_asterisk() {
451 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
452 let content = "First **strong** then __also strong__.";
453 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
454 let result = rule.check(&ctx).unwrap();
455
456 assert_eq!(result.len(), 1);
458 assert!(
459 result[0]
460 .message
461 .contains("Strong emphasis should use ** instead of __")
462 );
463 }
464
465 #[test]
466 fn test_consistent_style_first_underscore() {
467 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
468 let content = "First __strong__ then **also strong**.";
469 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
470 let result = rule.check(&ctx).unwrap();
471
472 assert_eq!(result.len(), 1);
474 assert!(
475 result[0]
476 .message
477 .contains("Strong emphasis should use __ instead of **")
478 );
479 }
480
481 #[test]
482 fn test_detect_style_asterisk() {
483 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
484 let ctx = LintContext::new("This has **strong** text.", crate::config::MarkdownFlavor::Standard);
485 let style = rule.detect_style(&ctx);
486
487 assert_eq!(style, Some(StrongStyle::Asterisk));
488 }
489
490 #[test]
491 fn test_detect_style_underscore() {
492 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
493 let ctx = LintContext::new("This has __strong__ text.", crate::config::MarkdownFlavor::Standard);
494 let style = rule.detect_style(&ctx);
495
496 assert_eq!(style, Some(StrongStyle::Underscore));
497 }
498
499 #[test]
500 fn test_detect_style_none() {
501 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
502 let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard);
503 let style = rule.detect_style(&ctx);
504
505 assert_eq!(style, None);
506 }
507
508 #[test]
509 fn test_strong_in_code_block() {
510 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
511 let content = "```\n__strong__ in code\n```\n__strong__ outside";
512 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
513 let result = rule.check(&ctx).unwrap();
514
515 assert_eq!(result.len(), 1);
517 assert_eq!(result[0].line, 4);
518 }
519
520 #[test]
521 fn test_strong_in_inline_code() {
522 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
523 let content = "Text with `__strong__` in code and __strong__ outside.";
524 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
525 let result = rule.check(&ctx).unwrap();
526
527 assert_eq!(result.len(), 1);
529 }
530
531 #[test]
532 fn test_escaped_strong() {
533 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
534 let content = "This is \\__not strong\\__ but __this is__.";
535 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
536 let result = rule.check(&ctx).unwrap();
537
538 assert_eq!(result.len(), 1);
540 assert_eq!(result[0].line, 1);
541 assert_eq!(result[0].column, 30);
542 }
543
544 #[test]
545 fn test_fix_asterisks_to_underscores() {
546 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
547 let content = "This is **strong** text.";
548 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
549 let fixed = rule.fix(&ctx).unwrap();
550
551 assert_eq!(fixed, "This is __strong__ text.");
552 }
553
554 #[test]
555 fn test_fix_underscores_to_asterisks() {
556 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
557 let content = "This is __strong__ text.";
558 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
559 let fixed = rule.fix(&ctx).unwrap();
560
561 assert_eq!(fixed, "This is **strong** text.");
562 }
563
564 #[test]
565 fn test_fix_multiple_strong() {
566 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
567 let content = "First __strong__ and second __also strong__.";
568 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
569 let fixed = rule.fix(&ctx).unwrap();
570
571 assert_eq!(fixed, "First **strong** and second **also strong**.");
572 }
573
574 #[test]
575 fn test_fix_preserves_code_blocks() {
576 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
577 let content = "```\n__strong__ in code\n```\n__strong__ outside";
578 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
579 let fixed = rule.fix(&ctx).unwrap();
580
581 assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
582 }
583
584 #[test]
585 fn test_multiline_content() {
586 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
587 let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
588 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
589 let result = rule.check(&ctx).unwrap();
590
591 assert_eq!(result.len(), 2);
592 assert_eq!(result[0].line, 1);
593 assert_eq!(result[1].line, 2);
594 }
595
596 #[test]
597 fn test_nested_emphasis() {
598 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
599 let content = "This has __strong with *emphasis* inside__.";
600 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
601 let result = rule.check(&ctx).unwrap();
602
603 assert_eq!(result.len(), 1);
604 }
605
606 #[test]
607 fn test_empty_content() {
608 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
609 let content = "";
610 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
611 let result = rule.check(&ctx).unwrap();
612
613 assert_eq!(result.len(), 0);
614 }
615
616 #[test]
617 fn test_default_config() {
618 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
619 let (name, _config) = rule.default_config_section().unwrap();
620 assert_eq!(name, "MD050");
621 }
622
623 #[test]
624 fn test_strong_in_links_not_flagged() {
625 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
626 let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
627
628Hint:
629
630- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
631
632
633[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
634 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
635 let result = rule.check(&ctx).unwrap();
636
637 assert_eq!(result.len(), 0);
639 }
640
641 #[test]
642 fn test_strong_in_links_vs_outside_links() {
643 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
644 let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
645
646Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
647
648This is __real strong text__ that should be flagged.
649
650[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
651 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
652 let result = rule.check(&ctx).unwrap();
653
654 assert_eq!(result.len(), 1);
656 assert!(
657 result[0]
658 .message
659 .contains("Strong emphasis should use ** instead of __")
660 );
661 assert!(result[0].line > 4); }
664
665 #[test]
666 fn test_front_matter_not_flagged() {
667 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
668 let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
669 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
670 let result = rule.check(&ctx).unwrap();
671
672 assert_eq!(result.len(), 1);
674 assert_eq!(result[0].line, 6);
675 assert!(
676 result[0]
677 .message
678 .contains("Strong emphasis should use ** instead of __")
679 );
680 }
681
682 #[test]
683 fn test_html_tags_not_flagged() {
684 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
685 let content = r#"# Test
686
687This has HTML with underscores:
688
689<iframe src="https://example.com/__init__/__repr__"> </iframe>
690
691This __should be flagged__ as inconsistent."#;
692 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
693 let result = rule.check(&ctx).unwrap();
694
695 assert_eq!(result.len(), 1);
697 assert_eq!(result[0].line, 7);
698 assert!(
699 result[0]
700 .message
701 .contains("Strong emphasis should use ** instead of __")
702 );
703 }
704}