1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use lazy_static::lazy_static;
7use regex::Regex;
8
9lazy_static! {
10 static ref REF_DEF_REGEX: Regex = Regex::new(
12 r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
13 ).unwrap();
14}
15
16mod md050_config;
17use md050_config::MD050Config;
18
19#[derive(Debug, Default, Clone)]
25pub struct MD050StrongStyle {
26 config: MD050Config,
27}
28
29impl MD050StrongStyle {
30 pub fn new(style: StrongStyle) -> Self {
31 Self {
32 config: MD050Config { style },
33 }
34 }
35
36 pub fn from_config_struct(config: MD050Config) -> Self {
37 Self { config }
38 }
39
40 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
42 for link in &ctx.links {
44 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
45 return true;
46 }
47 }
48
49 for image in &ctx.images {
51 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
52 return true;
53 }
54 }
55
56 for m in REF_DEF_REGEX.find_iter(ctx.content) {
58 if m.start() <= byte_pos && byte_pos < m.end() {
59 return true;
60 }
61 }
62
63 false
64 }
65
66 fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
68 for html_tag in ctx.html_tags().iter() {
70 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
71 return true;
72 }
73 }
74 false
75 }
76
77 fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
78 let content = ctx.content;
79
80 let mut first_asterisk = None;
82 for m in BOLD_ASTERISK_REGEX.find_iter(content) {
83 let (line_num, _) = ctx.offset_to_line_col(m.start());
85 let in_front_matter = ctx
86 .line_info(line_num)
87 .map(|info| info.in_front_matter)
88 .unwrap_or(false);
89
90 if !in_front_matter
91 && !ctx.is_in_code_block_or_span(m.start())
92 && !self.is_in_link(ctx, m.start())
93 && !self.is_in_html_tag(ctx, m.start())
94 {
95 first_asterisk = Some(m);
96 break;
97 }
98 }
99
100 let mut first_underscore = None;
101 for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
102 let (line_num, _) = ctx.offset_to_line_col(m.start());
104 let in_front_matter = ctx
105 .line_info(line_num)
106 .map(|info| info.in_front_matter)
107 .unwrap_or(false);
108
109 if !in_front_matter
110 && !ctx.is_in_code_block_or_span(m.start())
111 && !self.is_in_link(ctx, m.start())
112 && !self.is_in_html_tag(ctx, m.start())
113 {
114 first_underscore = Some(m);
115 break;
116 }
117 }
118
119 match (first_asterisk, first_underscore) {
120 (Some(a), Some(u)) => {
121 if a.start() < u.start() {
123 Some(StrongStyle::Asterisk)
124 } else {
125 Some(StrongStyle::Underscore)
126 }
127 }
128 (Some(_), None) => Some(StrongStyle::Asterisk),
129 (None, Some(_)) => Some(StrongStyle::Underscore),
130 (None, None) => None,
131 }
132 }
133
134 fn is_escaped(&self, text: &str, pos: usize) -> bool {
135 if pos == 0 {
136 return false;
137 }
138
139 let mut backslash_count = 0;
140 let mut i = pos;
141 while i > 0 {
142 i -= 1;
143 let c = text.chars().nth(i).unwrap_or(' ');
144 if c != '\\' {
145 break;
146 }
147 backslash_count += 1;
148 }
149 backslash_count % 2 == 1
150 }
151}
152
153impl Rule for MD050StrongStyle {
154 fn name(&self) -> &'static str {
155 "MD050"
156 }
157
158 fn description(&self) -> &'static str {
159 "Strong emphasis style should be consistent"
160 }
161
162 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
163 let content = ctx.content;
164 let _line_index = LineIndex::new(content.to_string());
165
166 let mut warnings = Vec::new();
167
168 let target_style = match self.config.style {
169 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
170 _ => self.config.style,
171 };
172
173 let strong_regex = match target_style {
174 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
175 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
176 StrongStyle::Consistent => {
177 &*BOLD_UNDERSCORE_REGEX
180 }
181 };
182
183 let mut byte_pos = 0;
185
186 for (line_num, line) in content.lines().enumerate() {
187 if let Some(line_info) = ctx.line_info(line_num + 1)
189 && line_info.in_front_matter
190 {
191 byte_pos += line.len() + 1; continue;
193 }
194
195 for m in strong_regex.find_iter(line) {
196 let match_byte_pos = byte_pos + m.start();
198
199 if ctx.is_in_code_block_or_span(match_byte_pos)
201 || self.is_in_link(ctx, match_byte_pos)
202 || self.is_in_html_tag(ctx, match_byte_pos)
203 {
204 continue;
205 }
206
207 if !self.is_escaped(line, m.start()) {
208 let text = &line[m.start() + 2..m.end() - 2];
209 let message = match target_style {
210 StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
211 StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
212 StrongStyle::Consistent => {
213 "Strong emphasis should use ** instead of __"
216 }
217 };
218
219 let (start_line, start_col, end_line, end_col) =
221 calculate_match_range(line_num + 1, line, m.start(), m.len());
222
223 warnings.push(LintWarning {
224 rule_name: Some(self.name()),
225 line: start_line,
226 column: start_col,
227 end_line,
228 end_column: end_col,
229 message: message.to_string(),
230 severity: Severity::Warning,
231 fix: Some(Fix {
232 range: _line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
233 replacement: match target_style {
234 StrongStyle::Asterisk => format!("**{text}**"),
235 StrongStyle::Underscore => format!("__{text}__"),
236 StrongStyle::Consistent => {
237 format!("**{text}**")
240 }
241 },
242 }),
243 });
244 }
245 }
246
247 byte_pos += line.len() + 1; }
250
251 Ok(warnings)
252 }
253
254 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
255 let content = ctx.content;
256
257 let target_style = match self.config.style {
258 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
259 _ => self.config.style,
260 };
261
262 let strong_regex = match target_style {
263 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
264 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
265 StrongStyle::Consistent => {
266 &*BOLD_UNDERSCORE_REGEX
269 }
270 };
271
272 let matches: Vec<(usize, usize)> = strong_regex
275 .find_iter(content)
276 .filter(|m| {
277 let (line_num, _) = ctx.offset_to_line_col(m.start());
279 if let Some(line_info) = ctx.line_info(line_num)
280 && line_info.in_front_matter
281 {
282 return false;
283 }
284 !ctx.is_in_code_block_or_span(m.start())
285 && !self.is_in_link(ctx, m.start())
286 && !self.is_in_html_tag(ctx, m.start())
287 })
288 .filter(|m| !self.is_escaped(content, m.start()))
289 .map(|m| (m.start(), m.end()))
290 .collect();
291
292 let mut result = content.to_string();
295 for (start, end) in matches.into_iter().rev() {
296 let text = &result[start + 2..end - 2];
297 let replacement = match target_style {
298 StrongStyle::Asterisk => format!("**{text}**"),
299 StrongStyle::Underscore => format!("__{text}__"),
300 StrongStyle::Consistent => {
301 format!("**{text}**")
304 }
305 };
306 result.replace_range(start..end, &replacement);
307 }
308
309 Ok(result)
310 }
311
312 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
314 ctx.content.is_empty() || (!ctx.content.contains("**") && !ctx.content.contains("__"))
315 }
316
317 fn as_any(&self) -> &dyn std::any::Any {
318 self
319 }
320
321 fn default_config_section(&self) -> Option<(String, toml::Value)> {
322 let json_value = serde_json::to_value(&self.config).ok()?;
323 Some((
324 self.name().to_string(),
325 crate::rule_config_serde::json_to_toml_value(&json_value)?,
326 ))
327 }
328
329 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
330 where
331 Self: Sized,
332 {
333 let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
334 Box::new(Self::from_config_struct(rule_config))
335 }
336}
337
338#[cfg(test)]
339mod tests {
340 use super::*;
341 use crate::lint_context::LintContext;
342
343 #[test]
344 fn test_asterisk_style_with_asterisks() {
345 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
346 let content = "This is **strong text** here.";
347 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
348 let result = rule.check(&ctx).unwrap();
349
350 assert_eq!(result.len(), 0);
351 }
352
353 #[test]
354 fn test_asterisk_style_with_underscores() {
355 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
356 let content = "This is __strong text__ here.";
357 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
358 let result = rule.check(&ctx).unwrap();
359
360 assert_eq!(result.len(), 1);
361 assert!(
362 result[0]
363 .message
364 .contains("Strong emphasis should use ** instead of __")
365 );
366 assert_eq!(result[0].line, 1);
367 assert_eq!(result[0].column, 9);
368 }
369
370 #[test]
371 fn test_underscore_style_with_underscores() {
372 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
373 let content = "This is __strong text__ here.";
374 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
375 let result = rule.check(&ctx).unwrap();
376
377 assert_eq!(result.len(), 0);
378 }
379
380 #[test]
381 fn test_underscore_style_with_asterisks() {
382 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
383 let content = "This is **strong text** here.";
384 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
385 let result = rule.check(&ctx).unwrap();
386
387 assert_eq!(result.len(), 1);
388 assert!(
389 result[0]
390 .message
391 .contains("Strong emphasis should use __ instead of **")
392 );
393 }
394
395 #[test]
396 fn test_consistent_style_first_asterisk() {
397 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
398 let content = "First **strong** then __also strong__.";
399 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
400 let result = rule.check(&ctx).unwrap();
401
402 assert_eq!(result.len(), 1);
404 assert!(
405 result[0]
406 .message
407 .contains("Strong emphasis should use ** instead of __")
408 );
409 }
410
411 #[test]
412 fn test_consistent_style_first_underscore() {
413 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
414 let content = "First __strong__ then **also strong**.";
415 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
416 let result = rule.check(&ctx).unwrap();
417
418 assert_eq!(result.len(), 1);
420 assert!(
421 result[0]
422 .message
423 .contains("Strong emphasis should use __ instead of **")
424 );
425 }
426
427 #[test]
428 fn test_detect_style_asterisk() {
429 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
430 let ctx = LintContext::new("This has **strong** text.", crate::config::MarkdownFlavor::Standard);
431 let style = rule.detect_style(&ctx);
432
433 assert_eq!(style, Some(StrongStyle::Asterisk));
434 }
435
436 #[test]
437 fn test_detect_style_underscore() {
438 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
439 let ctx = LintContext::new("This has __strong__ text.", crate::config::MarkdownFlavor::Standard);
440 let style = rule.detect_style(&ctx);
441
442 assert_eq!(style, Some(StrongStyle::Underscore));
443 }
444
445 #[test]
446 fn test_detect_style_none() {
447 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
448 let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard);
449 let style = rule.detect_style(&ctx);
450
451 assert_eq!(style, None);
452 }
453
454 #[test]
455 fn test_strong_in_code_block() {
456 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
457 let content = "```\n__strong__ in code\n```\n__strong__ outside";
458 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
459 let result = rule.check(&ctx).unwrap();
460
461 assert_eq!(result.len(), 1);
463 assert_eq!(result[0].line, 4);
464 }
465
466 #[test]
467 fn test_strong_in_inline_code() {
468 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
469 let content = "Text with `__strong__` in code and __strong__ outside.";
470 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
471 let result = rule.check(&ctx).unwrap();
472
473 assert_eq!(result.len(), 1);
475 }
476
477 #[test]
478 fn test_escaped_strong() {
479 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
480 let content = "This is \\__not strong\\__ but __this is__.";
481 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
482 let result = rule.check(&ctx).unwrap();
483
484 assert_eq!(result.len(), 1);
486 assert_eq!(result[0].line, 1);
487 assert_eq!(result[0].column, 30);
488 }
489
490 #[test]
491 fn test_fix_asterisks_to_underscores() {
492 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
493 let content = "This is **strong** text.";
494 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
495 let fixed = rule.fix(&ctx).unwrap();
496
497 assert_eq!(fixed, "This is __strong__ text.");
498 }
499
500 #[test]
501 fn test_fix_underscores_to_asterisks() {
502 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
503 let content = "This is __strong__ text.";
504 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
505 let fixed = rule.fix(&ctx).unwrap();
506
507 assert_eq!(fixed, "This is **strong** text.");
508 }
509
510 #[test]
511 fn test_fix_multiple_strong() {
512 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
513 let content = "First __strong__ and second __also strong__.";
514 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
515 let fixed = rule.fix(&ctx).unwrap();
516
517 assert_eq!(fixed, "First **strong** and second **also strong**.");
518 }
519
520 #[test]
521 fn test_fix_preserves_code_blocks() {
522 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
523 let content = "```\n__strong__ in code\n```\n__strong__ outside";
524 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
525 let fixed = rule.fix(&ctx).unwrap();
526
527 assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
528 }
529
530 #[test]
531 fn test_multiline_content() {
532 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
533 let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
534 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
535 let result = rule.check(&ctx).unwrap();
536
537 assert_eq!(result.len(), 2);
538 assert_eq!(result[0].line, 1);
539 assert_eq!(result[1].line, 2);
540 }
541
542 #[test]
543 fn test_nested_emphasis() {
544 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
545 let content = "This has __strong with *emphasis* inside__.";
546 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
547 let result = rule.check(&ctx).unwrap();
548
549 assert_eq!(result.len(), 1);
550 }
551
552 #[test]
553 fn test_empty_content() {
554 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
555 let content = "";
556 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
557 let result = rule.check(&ctx).unwrap();
558
559 assert_eq!(result.len(), 0);
560 }
561
562 #[test]
563 fn test_default_config() {
564 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
565 let (name, _config) = rule.default_config_section().unwrap();
566 assert_eq!(name, "MD050");
567 }
568
569 #[test]
570 fn test_strong_in_links_not_flagged() {
571 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
572 let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
573
574Hint:
575
576- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
577
578
579[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
580 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
581 let result = rule.check(&ctx).unwrap();
582
583 assert_eq!(result.len(), 0);
585 }
586
587 #[test]
588 fn test_strong_in_links_vs_outside_links() {
589 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
590 let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
591
592Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
593
594This is __real strong text__ that should be flagged.
595
596[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
597 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
598 let result = rule.check(&ctx).unwrap();
599
600 assert_eq!(result.len(), 1);
602 assert!(
603 result[0]
604 .message
605 .contains("Strong emphasis should use ** instead of __")
606 );
607 assert!(result[0].line > 4); }
610
611 #[test]
612 fn test_front_matter_not_flagged() {
613 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
614 let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
615 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
616 let result = rule.check(&ctx).unwrap();
617
618 assert_eq!(result.len(), 1);
620 assert_eq!(result[0].line, 6);
621 assert!(
622 result[0]
623 .message
624 .contains("Strong emphasis should use ** instead of __")
625 );
626 }
627
628 #[test]
629 fn test_html_tags_not_flagged() {
630 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
631 let content = r#"# Test
632
633This has HTML with underscores:
634
635<iframe src="https://example.com/__init__/__repr__"> </iframe>
636
637This __should be flagged__ as inconsistent."#;
638 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
639 let result = rule.check(&ctx).unwrap();
640
641 assert_eq!(result.len(), 1);
643 assert_eq!(result[0].line, 7);
644 assert!(
645 result[0]
646 .message
647 .contains("Strong emphasis should use ** instead of __")
648 );
649 }
650}