1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use lazy_static::lazy_static;
7use regex::Regex;
8
9lazy_static! {
10 static ref REF_DEF_REGEX: Regex = Regex::new(
12 r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
13 ).unwrap();
14}
15
16mod md050_config;
17use md050_config::MD050Config;
18
19#[derive(Debug, Default, Clone)]
25pub struct MD050StrongStyle {
26 config: MD050Config,
27}
28
29impl MD050StrongStyle {
30 pub fn new(style: StrongStyle) -> Self {
31 Self {
32 config: MD050Config { style },
33 }
34 }
35
36 pub fn from_config_struct(config: MD050Config) -> Self {
37 Self { config }
38 }
39
40 fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
42 for link in &ctx.links {
44 if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
45 return true;
46 }
47 }
48
49 for image in &ctx.images {
51 if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
52 return true;
53 }
54 }
55
56 for m in REF_DEF_REGEX.find_iter(ctx.content) {
58 if m.start() <= byte_pos && byte_pos < m.end() {
59 return true;
60 }
61 }
62
63 false
64 }
65
66 fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
68 for html_tag in ctx.html_tags().iter() {
70 if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
71 return true;
72 }
73 }
74 false
75 }
76
77 fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
78 let content = ctx.content;
79
80 let mut first_asterisk = None;
82 for m in BOLD_ASTERISK_REGEX.find_iter(content) {
83 let (line_num, _) = ctx.offset_to_line_col(m.start());
85 let in_front_matter = ctx
86 .line_info(line_num)
87 .map(|info| info.in_front_matter)
88 .unwrap_or(false);
89
90 if !in_front_matter
91 && !ctx.is_in_code_block_or_span(m.start())
92 && !self.is_in_link(ctx, m.start())
93 && !self.is_in_html_tag(ctx, m.start())
94 {
95 first_asterisk = Some(m);
96 break;
97 }
98 }
99
100 let mut first_underscore = None;
101 for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
102 let (line_num, _) = ctx.offset_to_line_col(m.start());
104 let in_front_matter = ctx
105 .line_info(line_num)
106 .map(|info| info.in_front_matter)
107 .unwrap_or(false);
108
109 if !in_front_matter
110 && !ctx.is_in_code_block_or_span(m.start())
111 && !self.is_in_link(ctx, m.start())
112 && !self.is_in_html_tag(ctx, m.start())
113 {
114 first_underscore = Some(m);
115 break;
116 }
117 }
118
119 match (first_asterisk, first_underscore) {
120 (Some(a), Some(u)) => {
121 if a.start() < u.start() {
123 Some(StrongStyle::Asterisk)
124 } else {
125 Some(StrongStyle::Underscore)
126 }
127 }
128 (Some(_), None) => Some(StrongStyle::Asterisk),
129 (None, Some(_)) => Some(StrongStyle::Underscore),
130 (None, None) => None,
131 }
132 }
133
134 fn is_escaped(&self, text: &str, pos: usize) -> bool {
135 if pos == 0 {
136 return false;
137 }
138
139 let mut backslash_count = 0;
140 let mut i = pos;
141 let bytes = text.as_bytes();
142 while i > 0 {
143 i -= 1;
144 if i < bytes.len() && bytes[i] != b'\\' {
146 break;
147 }
148 backslash_count += 1;
149 }
150 backslash_count % 2 == 1
151 }
152}
153
154impl Rule for MD050StrongStyle {
155 fn name(&self) -> &'static str {
156 "MD050"
157 }
158
159 fn description(&self) -> &'static str {
160 "Strong emphasis style should be consistent"
161 }
162
163 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
164 let content = ctx.content;
165 let _line_index = LineIndex::new(content.to_string());
166
167 let mut warnings = Vec::new();
168
169 let target_style = match self.config.style {
170 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
171 _ => self.config.style,
172 };
173
174 let strong_regex = match target_style {
175 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
176 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
177 StrongStyle::Consistent => {
178 &*BOLD_UNDERSCORE_REGEX
181 }
182 };
183
184 let mut byte_pos = 0;
186
187 for (line_num, line) in content.lines().enumerate() {
188 if let Some(line_info) = ctx.line_info(line_num + 1)
190 && line_info.in_front_matter
191 {
192 byte_pos += line.len() + 1; continue;
194 }
195
196 for m in strong_regex.find_iter(line) {
197 let match_byte_pos = byte_pos + m.start();
199
200 if ctx.is_in_code_block_or_span(match_byte_pos)
202 || self.is_in_link(ctx, match_byte_pos)
203 || self.is_in_html_tag(ctx, match_byte_pos)
204 {
205 continue;
206 }
207
208 if !self.is_escaped(line, m.start()) {
209 let text = &line[m.start() + 2..m.end() - 2];
210 let message = match target_style {
211 StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
212 StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
213 StrongStyle::Consistent => {
214 "Strong emphasis should use ** instead of __"
217 }
218 };
219
220 let (start_line, start_col, end_line, end_col) =
222 calculate_match_range(line_num + 1, line, m.start(), m.len());
223
224 warnings.push(LintWarning {
225 rule_name: Some(self.name()),
226 line: start_line,
227 column: start_col,
228 end_line,
229 end_column: end_col,
230 message: message.to_string(),
231 severity: Severity::Warning,
232 fix: Some(Fix {
233 range: _line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
234 replacement: match target_style {
235 StrongStyle::Asterisk => format!("**{text}**"),
236 StrongStyle::Underscore => format!("__{text}__"),
237 StrongStyle::Consistent => {
238 format!("**{text}**")
241 }
242 },
243 }),
244 });
245 }
246 }
247
248 byte_pos += line.len() + 1; }
251
252 Ok(warnings)
253 }
254
255 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
256 let content = ctx.content;
257
258 let target_style = match self.config.style {
259 StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
260 _ => self.config.style,
261 };
262
263 let strong_regex = match target_style {
264 StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
265 StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
266 StrongStyle::Consistent => {
267 &*BOLD_UNDERSCORE_REGEX
270 }
271 };
272
273 let matches: Vec<(usize, usize)> = strong_regex
276 .find_iter(content)
277 .filter(|m| {
278 let (line_num, _) = ctx.offset_to_line_col(m.start());
280 if let Some(line_info) = ctx.line_info(line_num)
281 && line_info.in_front_matter
282 {
283 return false;
284 }
285 !ctx.is_in_code_block_or_span(m.start())
286 && !self.is_in_link(ctx, m.start())
287 && !self.is_in_html_tag(ctx, m.start())
288 })
289 .filter(|m| !self.is_escaped(content, m.start()))
290 .map(|m| (m.start(), m.end()))
291 .collect();
292
293 let mut result = content.to_string();
296 for (start, end) in matches.into_iter().rev() {
297 let text = &result[start + 2..end - 2];
298 let replacement = match target_style {
299 StrongStyle::Asterisk => format!("**{text}**"),
300 StrongStyle::Underscore => format!("__{text}__"),
301 StrongStyle::Consistent => {
302 format!("**{text}**")
305 }
306 };
307 result.replace_range(start..end, &replacement);
308 }
309
310 Ok(result)
311 }
312
313 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
315 ctx.content.is_empty() || (!ctx.content.contains("**") && !ctx.content.contains("__"))
316 }
317
318 fn as_any(&self) -> &dyn std::any::Any {
319 self
320 }
321
322 fn default_config_section(&self) -> Option<(String, toml::Value)> {
323 let json_value = serde_json::to_value(&self.config).ok()?;
324 Some((
325 self.name().to_string(),
326 crate::rule_config_serde::json_to_toml_value(&json_value)?,
327 ))
328 }
329
330 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
331 where
332 Self: Sized,
333 {
334 let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
335 Box::new(Self::from_config_struct(rule_config))
336 }
337}
338
339#[cfg(test)]
340mod tests {
341 use super::*;
342 use crate::lint_context::LintContext;
343
344 #[test]
345 fn test_asterisk_style_with_asterisks() {
346 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
347 let content = "This is **strong text** here.";
348 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
349 let result = rule.check(&ctx).unwrap();
350
351 assert_eq!(result.len(), 0);
352 }
353
354 #[test]
355 fn test_asterisk_style_with_underscores() {
356 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
357 let content = "This is __strong text__ here.";
358 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
359 let result = rule.check(&ctx).unwrap();
360
361 assert_eq!(result.len(), 1);
362 assert!(
363 result[0]
364 .message
365 .contains("Strong emphasis should use ** instead of __")
366 );
367 assert_eq!(result[0].line, 1);
368 assert_eq!(result[0].column, 9);
369 }
370
371 #[test]
372 fn test_underscore_style_with_underscores() {
373 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
374 let content = "This is __strong text__ here.";
375 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
376 let result = rule.check(&ctx).unwrap();
377
378 assert_eq!(result.len(), 0);
379 }
380
381 #[test]
382 fn test_underscore_style_with_asterisks() {
383 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
384 let content = "This is **strong text** here.";
385 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
386 let result = rule.check(&ctx).unwrap();
387
388 assert_eq!(result.len(), 1);
389 assert!(
390 result[0]
391 .message
392 .contains("Strong emphasis should use __ instead of **")
393 );
394 }
395
396 #[test]
397 fn test_consistent_style_first_asterisk() {
398 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
399 let content = "First **strong** then __also strong__.";
400 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
401 let result = rule.check(&ctx).unwrap();
402
403 assert_eq!(result.len(), 1);
405 assert!(
406 result[0]
407 .message
408 .contains("Strong emphasis should use ** instead of __")
409 );
410 }
411
412 #[test]
413 fn test_consistent_style_first_underscore() {
414 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
415 let content = "First __strong__ then **also strong**.";
416 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
417 let result = rule.check(&ctx).unwrap();
418
419 assert_eq!(result.len(), 1);
421 assert!(
422 result[0]
423 .message
424 .contains("Strong emphasis should use __ instead of **")
425 );
426 }
427
428 #[test]
429 fn test_detect_style_asterisk() {
430 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
431 let ctx = LintContext::new("This has **strong** text.", crate::config::MarkdownFlavor::Standard);
432 let style = rule.detect_style(&ctx);
433
434 assert_eq!(style, Some(StrongStyle::Asterisk));
435 }
436
437 #[test]
438 fn test_detect_style_underscore() {
439 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
440 let ctx = LintContext::new("This has __strong__ text.", crate::config::MarkdownFlavor::Standard);
441 let style = rule.detect_style(&ctx);
442
443 assert_eq!(style, Some(StrongStyle::Underscore));
444 }
445
446 #[test]
447 fn test_detect_style_none() {
448 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
449 let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard);
450 let style = rule.detect_style(&ctx);
451
452 assert_eq!(style, None);
453 }
454
455 #[test]
456 fn test_strong_in_code_block() {
457 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
458 let content = "```\n__strong__ in code\n```\n__strong__ outside";
459 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
460 let result = rule.check(&ctx).unwrap();
461
462 assert_eq!(result.len(), 1);
464 assert_eq!(result[0].line, 4);
465 }
466
467 #[test]
468 fn test_strong_in_inline_code() {
469 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
470 let content = "Text with `__strong__` in code and __strong__ outside.";
471 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
472 let result = rule.check(&ctx).unwrap();
473
474 assert_eq!(result.len(), 1);
476 }
477
478 #[test]
479 fn test_escaped_strong() {
480 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
481 let content = "This is \\__not strong\\__ but __this is__.";
482 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
483 let result = rule.check(&ctx).unwrap();
484
485 assert_eq!(result.len(), 1);
487 assert_eq!(result[0].line, 1);
488 assert_eq!(result[0].column, 30);
489 }
490
491 #[test]
492 fn test_fix_asterisks_to_underscores() {
493 let rule = MD050StrongStyle::new(StrongStyle::Underscore);
494 let content = "This is **strong** text.";
495 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
496 let fixed = rule.fix(&ctx).unwrap();
497
498 assert_eq!(fixed, "This is __strong__ text.");
499 }
500
501 #[test]
502 fn test_fix_underscores_to_asterisks() {
503 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
504 let content = "This is __strong__ text.";
505 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
506 let fixed = rule.fix(&ctx).unwrap();
507
508 assert_eq!(fixed, "This is **strong** text.");
509 }
510
511 #[test]
512 fn test_fix_multiple_strong() {
513 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
514 let content = "First __strong__ and second __also strong__.";
515 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
516 let fixed = rule.fix(&ctx).unwrap();
517
518 assert_eq!(fixed, "First **strong** and second **also strong**.");
519 }
520
521 #[test]
522 fn test_fix_preserves_code_blocks() {
523 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
524 let content = "```\n__strong__ in code\n```\n__strong__ outside";
525 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
526 let fixed = rule.fix(&ctx).unwrap();
527
528 assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
529 }
530
531 #[test]
532 fn test_multiline_content() {
533 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
534 let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
535 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
536 let result = rule.check(&ctx).unwrap();
537
538 assert_eq!(result.len(), 2);
539 assert_eq!(result[0].line, 1);
540 assert_eq!(result[1].line, 2);
541 }
542
543 #[test]
544 fn test_nested_emphasis() {
545 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
546 let content = "This has __strong with *emphasis* inside__.";
547 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
548 let result = rule.check(&ctx).unwrap();
549
550 assert_eq!(result.len(), 1);
551 }
552
553 #[test]
554 fn test_empty_content() {
555 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
556 let content = "";
557 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
558 let result = rule.check(&ctx).unwrap();
559
560 assert_eq!(result.len(), 0);
561 }
562
563 #[test]
564 fn test_default_config() {
565 let rule = MD050StrongStyle::new(StrongStyle::Consistent);
566 let (name, _config) = rule.default_config_section().unwrap();
567 assert_eq!(name, "MD050");
568 }
569
570 #[test]
571 fn test_strong_in_links_not_flagged() {
572 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
573 let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
574
575Hint:
576
577- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
578
579
580[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
581 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
582 let result = rule.check(&ctx).unwrap();
583
584 assert_eq!(result.len(), 0);
586 }
587
588 #[test]
589 fn test_strong_in_links_vs_outside_links() {
590 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
591 let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
592
593Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
594
595This is __real strong text__ that should be flagged.
596
597[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
598 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
599 let result = rule.check(&ctx).unwrap();
600
601 assert_eq!(result.len(), 1);
603 assert!(
604 result[0]
605 .message
606 .contains("Strong emphasis should use ** instead of __")
607 );
608 assert!(result[0].line > 4); }
611
612 #[test]
613 fn test_front_matter_not_flagged() {
614 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
615 let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
616 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
617 let result = rule.check(&ctx).unwrap();
618
619 assert_eq!(result.len(), 1);
621 assert_eq!(result[0].line, 6);
622 assert!(
623 result[0]
624 .message
625 .contains("Strong emphasis should use ** instead of __")
626 );
627 }
628
629 #[test]
630 fn test_html_tags_not_flagged() {
631 let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
632 let content = r#"# Test
633
634This has HTML with underscores:
635
636<iframe src="https://example.com/__init__/__repr__"> </iframe>
637
638This __should be flagged__ as inconsistent."#;
639 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
640 let result = rule.check(&ctx).unwrap();
641
642 assert_eq!(result.len(), 1);
644 assert_eq!(result[0].line, 7);
645 assert!(
646 result[0]
647 .message
648 .contains("Strong emphasis should use ** instead of __")
649 );
650 }
651}