rumdl_lib/rules/
md050_strong_style.rs

1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use lazy_static::lazy_static;
7use regex::Regex;
8
9lazy_static! {
10    // Reference definition pattern - matches [ref]: url "title"
11    static ref REF_DEF_REGEX: Regex = Regex::new(
12        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
13    ).unwrap();
14}
15
16mod md050_config;
17use md050_config::MD050Config;
18
19/// Rule MD050: Strong style
20///
21/// See [docs/md050.md](../../docs/md050.md) for full documentation, configuration, and examples.
22///
23/// This rule is triggered when strong markers (** or __) are used in an inconsistent way.
24#[derive(Debug, Default, Clone)]
25pub struct MD050StrongStyle {
26    config: MD050Config,
27}
28
29impl MD050StrongStyle {
30    pub fn new(style: StrongStyle) -> Self {
31        Self {
32            config: MD050Config { style },
33        }
34    }
35
36    pub fn from_config_struct(config: MD050Config) -> Self {
37        Self { config }
38    }
39
40    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
41    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
42        // Check inline and reference links
43        for link in &ctx.links {
44            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
45                return true;
46            }
47        }
48
49        // Check images (which use similar syntax)
50        for image in &ctx.images {
51            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
52                return true;
53            }
54        }
55
56        // Check reference definitions [ref]: url "title" using regex pattern
57        for m in REF_DEF_REGEX.find_iter(ctx.content) {
58            if m.start() <= byte_pos && byte_pos < m.end() {
59                return true;
60            }
61        }
62
63        false
64    }
65
66    /// Check if a byte position is within an HTML tag
67    fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
68        // Check HTML tags
69        for html_tag in ctx.html_tags().iter() {
70            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
71                return true;
72            }
73        }
74        false
75    }
76
77    fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
78        let content = ctx.content;
79
80        // Find the first occurrence of either style that's not in a code block, link, HTML tag, or front matter
81        let mut first_asterisk = None;
82        for m in BOLD_ASTERISK_REGEX.find_iter(content) {
83            // Skip matches in front matter
84            let (line_num, _) = ctx.offset_to_line_col(m.start());
85            let in_front_matter = ctx
86                .line_info(line_num)
87                .map(|info| info.in_front_matter)
88                .unwrap_or(false);
89
90            if !in_front_matter
91                && !ctx.is_in_code_block_or_span(m.start())
92                && !self.is_in_link(ctx, m.start())
93                && !self.is_in_html_tag(ctx, m.start())
94            {
95                first_asterisk = Some(m);
96                break;
97            }
98        }
99
100        let mut first_underscore = None;
101        for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
102            // Skip matches in front matter
103            let (line_num, _) = ctx.offset_to_line_col(m.start());
104            let in_front_matter = ctx
105                .line_info(line_num)
106                .map(|info| info.in_front_matter)
107                .unwrap_or(false);
108
109            if !in_front_matter
110                && !ctx.is_in_code_block_or_span(m.start())
111                && !self.is_in_link(ctx, m.start())
112                && !self.is_in_html_tag(ctx, m.start())
113            {
114                first_underscore = Some(m);
115                break;
116            }
117        }
118
119        match (first_asterisk, first_underscore) {
120            (Some(a), Some(u)) => {
121                // Whichever pattern appears first determines the style
122                if a.start() < u.start() {
123                    Some(StrongStyle::Asterisk)
124                } else {
125                    Some(StrongStyle::Underscore)
126                }
127            }
128            (Some(_), None) => Some(StrongStyle::Asterisk),
129            (None, Some(_)) => Some(StrongStyle::Underscore),
130            (None, None) => None,
131        }
132    }
133
134    fn is_escaped(&self, text: &str, pos: usize) -> bool {
135        if pos == 0 {
136            return false;
137        }
138
139        let mut backslash_count = 0;
140        let mut i = pos;
141        while i > 0 {
142            i -= 1;
143            let c = text.chars().nth(i).unwrap_or(' ');
144            if c != '\\' {
145                break;
146            }
147            backslash_count += 1;
148        }
149        backslash_count % 2 == 1
150    }
151}
152
153impl Rule for MD050StrongStyle {
154    fn name(&self) -> &'static str {
155        "MD050"
156    }
157
158    fn description(&self) -> &'static str {
159        "Strong emphasis style should be consistent"
160    }
161
162    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
163        let content = ctx.content;
164        let _line_index = LineIndex::new(content.to_string());
165
166        let mut warnings = Vec::new();
167
168        let target_style = match self.config.style {
169            StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
170            _ => self.config.style,
171        };
172
173        let strong_regex = match target_style {
174            StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
175            StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
176            StrongStyle::Consistent => {
177                // This case is handled separately in the calling code
178                // but fallback to asterisk style for safety
179                &*BOLD_UNDERSCORE_REGEX
180            }
181        };
182
183        // Track byte position for each line
184        let mut byte_pos = 0;
185
186        for (line_num, line) in content.lines().enumerate() {
187            // Skip if this line is in front matter
188            if let Some(line_info) = ctx.line_info(line_num + 1)
189                && line_info.in_front_matter
190            {
191                byte_pos += line.len() + 1; // +1 for newline
192                continue;
193            }
194
195            for m in strong_regex.find_iter(line) {
196                // Calculate the byte position of this match in the document
197                let match_byte_pos = byte_pos + m.start();
198
199                // Skip if this strong text is inside a code block, code span, link, or HTML tag
200                if ctx.is_in_code_block_or_span(match_byte_pos)
201                    || self.is_in_link(ctx, match_byte_pos)
202                    || self.is_in_html_tag(ctx, match_byte_pos)
203                {
204                    continue;
205                }
206
207                if !self.is_escaped(line, m.start()) {
208                    let text = &line[m.start() + 2..m.end() - 2];
209                    let message = match target_style {
210                        StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
211                        StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
212                        StrongStyle::Consistent => {
213                            // This case is handled separately in the calling code
214                            // but fallback to asterisk style for safety
215                            "Strong emphasis should use ** instead of __"
216                        }
217                    };
218
219                    // Calculate precise character range for the entire strong emphasis
220                    let (start_line, start_col, end_line, end_col) =
221                        calculate_match_range(line_num + 1, line, m.start(), m.len());
222
223                    warnings.push(LintWarning {
224                        rule_name: Some(self.name()),
225                        line: start_line,
226                        column: start_col,
227                        end_line,
228                        end_column: end_col,
229                        message: message.to_string(),
230                        severity: Severity::Warning,
231                        fix: Some(Fix {
232                            range: _line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
233                            replacement: match target_style {
234                                StrongStyle::Asterisk => format!("**{text}**"),
235                                StrongStyle::Underscore => format!("__{text}__"),
236                                StrongStyle::Consistent => {
237                                    // This case is handled separately in the calling code
238                                    // but fallback to asterisk style for safety
239                                    format!("**{text}**")
240                                }
241                            },
242                        }),
243                    });
244                }
245            }
246
247            // Update byte position for next line
248            byte_pos += line.len() + 1; // +1 for newline
249        }
250
251        Ok(warnings)
252    }
253
254    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
255        let content = ctx.content;
256
257        let target_style = match self.config.style {
258            StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
259            _ => self.config.style,
260        };
261
262        let strong_regex = match target_style {
263            StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
264            StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
265            StrongStyle::Consistent => {
266                // This case is handled separately in the calling code
267                // but fallback to asterisk style for safety
268                &*BOLD_UNDERSCORE_REGEX
269            }
270        };
271
272        // Store matches with their positions
273
274        let matches: Vec<(usize, usize)> = strong_regex
275            .find_iter(content)
276            .filter(|m| {
277                // Skip matches in front matter
278                let (line_num, _) = ctx.offset_to_line_col(m.start());
279                if let Some(line_info) = ctx.line_info(line_num)
280                    && line_info.in_front_matter
281                {
282                    return false;
283                }
284                !ctx.is_in_code_block_or_span(m.start())
285                    && !self.is_in_link(ctx, m.start())
286                    && !self.is_in_html_tag(ctx, m.start())
287            })
288            .filter(|m| !self.is_escaped(content, m.start()))
289            .map(|m| (m.start(), m.end()))
290            .collect();
291
292        // Process matches in reverse order to maintain correct indices
293
294        let mut result = content.to_string();
295        for (start, end) in matches.into_iter().rev() {
296            let text = &result[start + 2..end - 2];
297            let replacement = match target_style {
298                StrongStyle::Asterisk => format!("**{text}**"),
299                StrongStyle::Underscore => format!("__{text}__"),
300                StrongStyle::Consistent => {
301                    // This case is handled separately in the calling code
302                    // but fallback to asterisk style for safety
303                    format!("**{text}**")
304                }
305            };
306            result.replace_range(start..end, &replacement);
307        }
308
309        Ok(result)
310    }
311
312    /// Check if this rule should be skipped
313    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
314        ctx.content.is_empty() || (!ctx.content.contains("**") && !ctx.content.contains("__"))
315    }
316
317    fn as_any(&self) -> &dyn std::any::Any {
318        self
319    }
320
321    fn default_config_section(&self) -> Option<(String, toml::Value)> {
322        let json_value = serde_json::to_value(&self.config).ok()?;
323        Some((
324            self.name().to_string(),
325            crate::rule_config_serde::json_to_toml_value(&json_value)?,
326        ))
327    }
328
329    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
330    where
331        Self: Sized,
332    {
333        let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
334        Box::new(Self::from_config_struct(rule_config))
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341    use crate::lint_context::LintContext;
342
343    #[test]
344    fn test_asterisk_style_with_asterisks() {
345        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
346        let content = "This is **strong text** here.";
347        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
348        let result = rule.check(&ctx).unwrap();
349
350        assert_eq!(result.len(), 0);
351    }
352
353    #[test]
354    fn test_asterisk_style_with_underscores() {
355        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
356        let content = "This is __strong text__ here.";
357        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
358        let result = rule.check(&ctx).unwrap();
359
360        assert_eq!(result.len(), 1);
361        assert!(
362            result[0]
363                .message
364                .contains("Strong emphasis should use ** instead of __")
365        );
366        assert_eq!(result[0].line, 1);
367        assert_eq!(result[0].column, 9);
368    }
369
370    #[test]
371    fn test_underscore_style_with_underscores() {
372        let rule = MD050StrongStyle::new(StrongStyle::Underscore);
373        let content = "This is __strong text__ here.";
374        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
375        let result = rule.check(&ctx).unwrap();
376
377        assert_eq!(result.len(), 0);
378    }
379
380    #[test]
381    fn test_underscore_style_with_asterisks() {
382        let rule = MD050StrongStyle::new(StrongStyle::Underscore);
383        let content = "This is **strong text** here.";
384        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
385        let result = rule.check(&ctx).unwrap();
386
387        assert_eq!(result.len(), 1);
388        assert!(
389            result[0]
390                .message
391                .contains("Strong emphasis should use __ instead of **")
392        );
393    }
394
395    #[test]
396    fn test_consistent_style_first_asterisk() {
397        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
398        let content = "First **strong** then __also strong__.";
399        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
400        let result = rule.check(&ctx).unwrap();
401
402        // First strong is **, so __ should be flagged
403        assert_eq!(result.len(), 1);
404        assert!(
405            result[0]
406                .message
407                .contains("Strong emphasis should use ** instead of __")
408        );
409    }
410
411    #[test]
412    fn test_consistent_style_first_underscore() {
413        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
414        let content = "First __strong__ then **also strong**.";
415        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
416        let result = rule.check(&ctx).unwrap();
417
418        // First strong is __, so ** should be flagged
419        assert_eq!(result.len(), 1);
420        assert!(
421            result[0]
422                .message
423                .contains("Strong emphasis should use __ instead of **")
424        );
425    }
426
427    #[test]
428    fn test_detect_style_asterisk() {
429        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
430        let ctx = LintContext::new("This has **strong** text.", crate::config::MarkdownFlavor::Standard);
431        let style = rule.detect_style(&ctx);
432
433        assert_eq!(style, Some(StrongStyle::Asterisk));
434    }
435
436    #[test]
437    fn test_detect_style_underscore() {
438        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
439        let ctx = LintContext::new("This has __strong__ text.", crate::config::MarkdownFlavor::Standard);
440        let style = rule.detect_style(&ctx);
441
442        assert_eq!(style, Some(StrongStyle::Underscore));
443    }
444
445    #[test]
446    fn test_detect_style_none() {
447        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
448        let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard);
449        let style = rule.detect_style(&ctx);
450
451        assert_eq!(style, None);
452    }
453
454    #[test]
455    fn test_strong_in_code_block() {
456        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
457        let content = "```\n__strong__ in code\n```\n__strong__ outside";
458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
459        let result = rule.check(&ctx).unwrap();
460
461        // Only the strong outside code block should be flagged
462        assert_eq!(result.len(), 1);
463        assert_eq!(result[0].line, 4);
464    }
465
466    #[test]
467    fn test_strong_in_inline_code() {
468        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
469        let content = "Text with `__strong__` in code and __strong__ outside.";
470        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
471        let result = rule.check(&ctx).unwrap();
472
473        // Only the strong outside inline code should be flagged
474        assert_eq!(result.len(), 1);
475    }
476
477    #[test]
478    fn test_escaped_strong() {
479        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
480        let content = "This is \\__not strong\\__ but __this is__.";
481        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
482        let result = rule.check(&ctx).unwrap();
483
484        // Only the unescaped strong should be flagged
485        assert_eq!(result.len(), 1);
486        assert_eq!(result[0].line, 1);
487        assert_eq!(result[0].column, 30);
488    }
489
490    #[test]
491    fn test_fix_asterisks_to_underscores() {
492        let rule = MD050StrongStyle::new(StrongStyle::Underscore);
493        let content = "This is **strong** text.";
494        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
495        let fixed = rule.fix(&ctx).unwrap();
496
497        assert_eq!(fixed, "This is __strong__ text.");
498    }
499
500    #[test]
501    fn test_fix_underscores_to_asterisks() {
502        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
503        let content = "This is __strong__ text.";
504        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
505        let fixed = rule.fix(&ctx).unwrap();
506
507        assert_eq!(fixed, "This is **strong** text.");
508    }
509
510    #[test]
511    fn test_fix_multiple_strong() {
512        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
513        let content = "First __strong__ and second __also strong__.";
514        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
515        let fixed = rule.fix(&ctx).unwrap();
516
517        assert_eq!(fixed, "First **strong** and second **also strong**.");
518    }
519
520    #[test]
521    fn test_fix_preserves_code_blocks() {
522        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
523        let content = "```\n__strong__ in code\n```\n__strong__ outside";
524        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
525        let fixed = rule.fix(&ctx).unwrap();
526
527        assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
528    }
529
530    #[test]
531    fn test_multiline_content() {
532        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
533        let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
534        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
535        let result = rule.check(&ctx).unwrap();
536
537        assert_eq!(result.len(), 2);
538        assert_eq!(result[0].line, 1);
539        assert_eq!(result[1].line, 2);
540    }
541
542    #[test]
543    fn test_nested_emphasis() {
544        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
545        let content = "This has __strong with *emphasis* inside__.";
546        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
547        let result = rule.check(&ctx).unwrap();
548
549        assert_eq!(result.len(), 1);
550    }
551
552    #[test]
553    fn test_empty_content() {
554        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
555        let content = "";
556        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
557        let result = rule.check(&ctx).unwrap();
558
559        assert_eq!(result.len(), 0);
560    }
561
562    #[test]
563    fn test_default_config() {
564        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
565        let (name, _config) = rule.default_config_section().unwrap();
566        assert_eq!(name, "MD050");
567    }
568
569    #[test]
570    fn test_strong_in_links_not_flagged() {
571        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
572        let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
573
574Hint:
575
576- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
577
578
579[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
580        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
581        let result = rule.check(&ctx).unwrap();
582
583        // None of the __ patterns in links should be flagged
584        assert_eq!(result.len(), 0);
585    }
586
587    #[test]
588    fn test_strong_in_links_vs_outside_links() {
589        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
590        let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
591
592Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
593
594This is __real strong text__ that should be flagged.
595
596[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
597        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
598        let result = rule.check(&ctx).unwrap();
599
600        // Only the real strong text should be flagged, not the __ in links
601        assert_eq!(result.len(), 1);
602        assert!(
603            result[0]
604                .message
605                .contains("Strong emphasis should use ** instead of __")
606        );
607        // The flagged text should be "real strong text"
608        assert!(result[0].line > 4); // Should be on the line with "real strong text"
609    }
610
611    #[test]
612    fn test_front_matter_not_flagged() {
613        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
614        let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
615        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
616        let result = rule.check(&ctx).unwrap();
617
618        // Only the strong text outside front matter should be flagged
619        assert_eq!(result.len(), 1);
620        assert_eq!(result[0].line, 6);
621        assert!(
622            result[0]
623                .message
624                .contains("Strong emphasis should use ** instead of __")
625        );
626    }
627
628    #[test]
629    fn test_html_tags_not_flagged() {
630        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
631        let content = r#"# Test
632
633This has HTML with underscores:
634
635<iframe src="https://example.com/__init__/__repr__"> </iframe>
636
637This __should be flagged__ as inconsistent."#;
638        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
639        let result = rule.check(&ctx).unwrap();
640
641        // Only the strong text outside HTML tags should be flagged
642        assert_eq!(result.len(), 1);
643        assert_eq!(result[0].line, 7);
644        assert!(
645            result[0]
646                .message
647                .contains("Strong emphasis should use ** instead of __")
648        );
649    }
650}