rumdl_lib/rules/
md050_strong_style.rs

1use crate::utils::range_utils::{LineIndex, calculate_match_range};
2use crate::utils::regex_cache::{BOLD_ASTERISK_REGEX, BOLD_UNDERSCORE_REGEX};
3
4use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, Severity};
5use crate::rules::strong_style::StrongStyle;
6use lazy_static::lazy_static;
7use regex::Regex;
8
9lazy_static! {
10    // Reference definition pattern - matches [ref]: url "title"
11    static ref REF_DEF_REGEX: Regex = Regex::new(
12        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
13    ).unwrap();
14}
15
16mod md050_config;
17use md050_config::MD050Config;
18
19/// Rule MD050: Strong style
20///
21/// See [docs/md050.md](../../docs/md050.md) for full documentation, configuration, and examples.
22///
23/// This rule is triggered when strong markers (** or __) are used in an inconsistent way.
24#[derive(Debug, Default, Clone)]
25pub struct MD050StrongStyle {
26    config: MD050Config,
27}
28
29impl MD050StrongStyle {
30    pub fn new(style: StrongStyle) -> Self {
31        Self {
32            config: MD050Config { style },
33        }
34    }
35
36    pub fn from_config_struct(config: MD050Config) -> Self {
37        Self { config }
38    }
39
40    /// Check if a byte position is within a link (inline links, reference links, or reference definitions)
41    fn is_in_link(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
42        // Check inline and reference links
43        for link in &ctx.links {
44            if link.byte_offset <= byte_pos && byte_pos < link.byte_end {
45                return true;
46            }
47        }
48
49        // Check images (which use similar syntax)
50        for image in &ctx.images {
51            if image.byte_offset <= byte_pos && byte_pos < image.byte_end {
52                return true;
53            }
54        }
55
56        // Check reference definitions [ref]: url "title" using regex pattern
57        for m in REF_DEF_REGEX.find_iter(ctx.content) {
58            if m.start() <= byte_pos && byte_pos < m.end() {
59                return true;
60            }
61        }
62
63        false
64    }
65
66    /// Check if a byte position is within an HTML tag
67    fn is_in_html_tag(&self, ctx: &crate::lint_context::LintContext, byte_pos: usize) -> bool {
68        // Check HTML tags
69        for html_tag in ctx.html_tags().iter() {
70            if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
71                return true;
72            }
73        }
74        false
75    }
76
77    fn detect_style(&self, ctx: &crate::lint_context::LintContext) -> Option<StrongStyle> {
78        let content = ctx.content;
79
80        // Find the first occurrence of either style that's not in a code block, link, HTML tag, or front matter
81        let mut first_asterisk = None;
82        for m in BOLD_ASTERISK_REGEX.find_iter(content) {
83            // Skip matches in front matter
84            let (line_num, _) = ctx.offset_to_line_col(m.start());
85            let in_front_matter = ctx
86                .line_info(line_num)
87                .map(|info| info.in_front_matter)
88                .unwrap_or(false);
89
90            if !in_front_matter
91                && !ctx.is_in_code_block_or_span(m.start())
92                && !self.is_in_link(ctx, m.start())
93                && !self.is_in_html_tag(ctx, m.start())
94            {
95                first_asterisk = Some(m);
96                break;
97            }
98        }
99
100        let mut first_underscore = None;
101        for m in BOLD_UNDERSCORE_REGEX.find_iter(content) {
102            // Skip matches in front matter
103            let (line_num, _) = ctx.offset_to_line_col(m.start());
104            let in_front_matter = ctx
105                .line_info(line_num)
106                .map(|info| info.in_front_matter)
107                .unwrap_or(false);
108
109            if !in_front_matter
110                && !ctx.is_in_code_block_or_span(m.start())
111                && !self.is_in_link(ctx, m.start())
112                && !self.is_in_html_tag(ctx, m.start())
113            {
114                first_underscore = Some(m);
115                break;
116            }
117        }
118
119        match (first_asterisk, first_underscore) {
120            (Some(a), Some(u)) => {
121                // Whichever pattern appears first determines the style
122                if a.start() < u.start() {
123                    Some(StrongStyle::Asterisk)
124                } else {
125                    Some(StrongStyle::Underscore)
126                }
127            }
128            (Some(_), None) => Some(StrongStyle::Asterisk),
129            (None, Some(_)) => Some(StrongStyle::Underscore),
130            (None, None) => None,
131        }
132    }
133
134    fn is_escaped(&self, text: &str, pos: usize) -> bool {
135        if pos == 0 {
136            return false;
137        }
138
139        let mut backslash_count = 0;
140        let mut i = pos;
141        let bytes = text.as_bytes();
142        while i > 0 {
143            i -= 1;
144            // Safe for ASCII backslash
145            if i < bytes.len() && bytes[i] != b'\\' {
146                break;
147            }
148            backslash_count += 1;
149        }
150        backslash_count % 2 == 1
151    }
152}
153
154impl Rule for MD050StrongStyle {
155    fn name(&self) -> &'static str {
156        "MD050"
157    }
158
159    fn description(&self) -> &'static str {
160        "Strong emphasis style should be consistent"
161    }
162
163    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
164        let content = ctx.content;
165        let _line_index = LineIndex::new(content.to_string());
166
167        let mut warnings = Vec::new();
168
169        let target_style = match self.config.style {
170            StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
171            _ => self.config.style,
172        };
173
174        let strong_regex = match target_style {
175            StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
176            StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
177            StrongStyle::Consistent => {
178                // This case is handled separately in the calling code
179                // but fallback to asterisk style for safety
180                &*BOLD_UNDERSCORE_REGEX
181            }
182        };
183
184        // Track byte position for each line
185        let mut byte_pos = 0;
186
187        for (line_num, line) in content.lines().enumerate() {
188            // Skip if this line is in front matter
189            if let Some(line_info) = ctx.line_info(line_num + 1)
190                && line_info.in_front_matter
191            {
192                byte_pos += line.len() + 1; // +1 for newline
193                continue;
194            }
195
196            for m in strong_regex.find_iter(line) {
197                // Calculate the byte position of this match in the document
198                let match_byte_pos = byte_pos + m.start();
199
200                // Skip if this strong text is inside a code block, code span, link, or HTML tag
201                if ctx.is_in_code_block_or_span(match_byte_pos)
202                    || self.is_in_link(ctx, match_byte_pos)
203                    || self.is_in_html_tag(ctx, match_byte_pos)
204                {
205                    continue;
206                }
207
208                if !self.is_escaped(line, m.start()) {
209                    let text = &line[m.start() + 2..m.end() - 2];
210                    let message = match target_style {
211                        StrongStyle::Asterisk => "Strong emphasis should use ** instead of __",
212                        StrongStyle::Underscore => "Strong emphasis should use __ instead of **",
213                        StrongStyle::Consistent => {
214                            // This case is handled separately in the calling code
215                            // but fallback to asterisk style for safety
216                            "Strong emphasis should use ** instead of __"
217                        }
218                    };
219
220                    // Calculate precise character range for the entire strong emphasis
221                    let (start_line, start_col, end_line, end_col) =
222                        calculate_match_range(line_num + 1, line, m.start(), m.len());
223
224                    warnings.push(LintWarning {
225                        rule_name: Some(self.name()),
226                        line: start_line,
227                        column: start_col,
228                        end_line,
229                        end_column: end_col,
230                        message: message.to_string(),
231                        severity: Severity::Warning,
232                        fix: Some(Fix {
233                            range: _line_index.line_col_to_byte_range(line_num + 1, m.start() + 1),
234                            replacement: match target_style {
235                                StrongStyle::Asterisk => format!("**{text}**"),
236                                StrongStyle::Underscore => format!("__{text}__"),
237                                StrongStyle::Consistent => {
238                                    // This case is handled separately in the calling code
239                                    // but fallback to asterisk style for safety
240                                    format!("**{text}**")
241                                }
242                            },
243                        }),
244                    });
245                }
246            }
247
248            // Update byte position for next line
249            byte_pos += line.len() + 1; // +1 for newline
250        }
251
252        Ok(warnings)
253    }
254
255    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
256        let content = ctx.content;
257
258        let target_style = match self.config.style {
259            StrongStyle::Consistent => self.detect_style(ctx).unwrap_or(StrongStyle::Asterisk),
260            _ => self.config.style,
261        };
262
263        let strong_regex = match target_style {
264            StrongStyle::Asterisk => &*BOLD_UNDERSCORE_REGEX,
265            StrongStyle::Underscore => &*BOLD_ASTERISK_REGEX,
266            StrongStyle::Consistent => {
267                // This case is handled separately in the calling code
268                // but fallback to asterisk style for safety
269                &*BOLD_UNDERSCORE_REGEX
270            }
271        };
272
273        // Store matches with their positions
274
275        let matches: Vec<(usize, usize)> = strong_regex
276            .find_iter(content)
277            .filter(|m| {
278                // Skip matches in front matter
279                let (line_num, _) = ctx.offset_to_line_col(m.start());
280                if let Some(line_info) = ctx.line_info(line_num)
281                    && line_info.in_front_matter
282                {
283                    return false;
284                }
285                !ctx.is_in_code_block_or_span(m.start())
286                    && !self.is_in_link(ctx, m.start())
287                    && !self.is_in_html_tag(ctx, m.start())
288            })
289            .filter(|m| !self.is_escaped(content, m.start()))
290            .map(|m| (m.start(), m.end()))
291            .collect();
292
293        // Process matches in reverse order to maintain correct indices
294
295        let mut result = content.to_string();
296        for (start, end) in matches.into_iter().rev() {
297            let text = &result[start + 2..end - 2];
298            let replacement = match target_style {
299                StrongStyle::Asterisk => format!("**{text}**"),
300                StrongStyle::Underscore => format!("__{text}__"),
301                StrongStyle::Consistent => {
302                    // This case is handled separately in the calling code
303                    // but fallback to asterisk style for safety
304                    format!("**{text}**")
305                }
306            };
307            result.replace_range(start..end, &replacement);
308        }
309
310        Ok(result)
311    }
312
313    /// Check if this rule should be skipped
314    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
315        ctx.content.is_empty() || (!ctx.content.contains("**") && !ctx.content.contains("__"))
316    }
317
318    fn as_any(&self) -> &dyn std::any::Any {
319        self
320    }
321
322    fn default_config_section(&self) -> Option<(String, toml::Value)> {
323        let json_value = serde_json::to_value(&self.config).ok()?;
324        Some((
325            self.name().to_string(),
326            crate::rule_config_serde::json_to_toml_value(&json_value)?,
327        ))
328    }
329
330    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
331    where
332        Self: Sized,
333    {
334        let rule_config = crate::rule_config_serde::load_rule_config::<MD050Config>(config);
335        Box::new(Self::from_config_struct(rule_config))
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342    use crate::lint_context::LintContext;
343
344    #[test]
345    fn test_asterisk_style_with_asterisks() {
346        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
347        let content = "This is **strong text** here.";
348        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
349        let result = rule.check(&ctx).unwrap();
350
351        assert_eq!(result.len(), 0);
352    }
353
354    #[test]
355    fn test_asterisk_style_with_underscores() {
356        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
357        let content = "This is __strong text__ here.";
358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
359        let result = rule.check(&ctx).unwrap();
360
361        assert_eq!(result.len(), 1);
362        assert!(
363            result[0]
364                .message
365                .contains("Strong emphasis should use ** instead of __")
366        );
367        assert_eq!(result[0].line, 1);
368        assert_eq!(result[0].column, 9);
369    }
370
371    #[test]
372    fn test_underscore_style_with_underscores() {
373        let rule = MD050StrongStyle::new(StrongStyle::Underscore);
374        let content = "This is __strong text__ here.";
375        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
376        let result = rule.check(&ctx).unwrap();
377
378        assert_eq!(result.len(), 0);
379    }
380
381    #[test]
382    fn test_underscore_style_with_asterisks() {
383        let rule = MD050StrongStyle::new(StrongStyle::Underscore);
384        let content = "This is **strong text** here.";
385        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
386        let result = rule.check(&ctx).unwrap();
387
388        assert_eq!(result.len(), 1);
389        assert!(
390            result[0]
391                .message
392                .contains("Strong emphasis should use __ instead of **")
393        );
394    }
395
396    #[test]
397    fn test_consistent_style_first_asterisk() {
398        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
399        let content = "First **strong** then __also strong__.";
400        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
401        let result = rule.check(&ctx).unwrap();
402
403        // First strong is **, so __ should be flagged
404        assert_eq!(result.len(), 1);
405        assert!(
406            result[0]
407                .message
408                .contains("Strong emphasis should use ** instead of __")
409        );
410    }
411
412    #[test]
413    fn test_consistent_style_first_underscore() {
414        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
415        let content = "First __strong__ then **also strong**.";
416        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
417        let result = rule.check(&ctx).unwrap();
418
419        // First strong is __, so ** should be flagged
420        assert_eq!(result.len(), 1);
421        assert!(
422            result[0]
423                .message
424                .contains("Strong emphasis should use __ instead of **")
425        );
426    }
427
428    #[test]
429    fn test_detect_style_asterisk() {
430        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
431        let ctx = LintContext::new("This has **strong** text.", crate::config::MarkdownFlavor::Standard);
432        let style = rule.detect_style(&ctx);
433
434        assert_eq!(style, Some(StrongStyle::Asterisk));
435    }
436
437    #[test]
438    fn test_detect_style_underscore() {
439        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
440        let ctx = LintContext::new("This has __strong__ text.", crate::config::MarkdownFlavor::Standard);
441        let style = rule.detect_style(&ctx);
442
443        assert_eq!(style, Some(StrongStyle::Underscore));
444    }
445
446    #[test]
447    fn test_detect_style_none() {
448        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
449        let ctx = LintContext::new("No strong text here.", crate::config::MarkdownFlavor::Standard);
450        let style = rule.detect_style(&ctx);
451
452        assert_eq!(style, None);
453    }
454
455    #[test]
456    fn test_strong_in_code_block() {
457        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
458        let content = "```\n__strong__ in code\n```\n__strong__ outside";
459        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
460        let result = rule.check(&ctx).unwrap();
461
462        // Only the strong outside code block should be flagged
463        assert_eq!(result.len(), 1);
464        assert_eq!(result[0].line, 4);
465    }
466
467    #[test]
468    fn test_strong_in_inline_code() {
469        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
470        let content = "Text with `__strong__` in code and __strong__ outside.";
471        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
472        let result = rule.check(&ctx).unwrap();
473
474        // Only the strong outside inline code should be flagged
475        assert_eq!(result.len(), 1);
476    }
477
478    #[test]
479    fn test_escaped_strong() {
480        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
481        let content = "This is \\__not strong\\__ but __this is__.";
482        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
483        let result = rule.check(&ctx).unwrap();
484
485        // Only the unescaped strong should be flagged
486        assert_eq!(result.len(), 1);
487        assert_eq!(result[0].line, 1);
488        assert_eq!(result[0].column, 30);
489    }
490
491    #[test]
492    fn test_fix_asterisks_to_underscores() {
493        let rule = MD050StrongStyle::new(StrongStyle::Underscore);
494        let content = "This is **strong** text.";
495        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
496        let fixed = rule.fix(&ctx).unwrap();
497
498        assert_eq!(fixed, "This is __strong__ text.");
499    }
500
501    #[test]
502    fn test_fix_underscores_to_asterisks() {
503        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
504        let content = "This is __strong__ text.";
505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
506        let fixed = rule.fix(&ctx).unwrap();
507
508        assert_eq!(fixed, "This is **strong** text.");
509    }
510
511    #[test]
512    fn test_fix_multiple_strong() {
513        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
514        let content = "First __strong__ and second __also strong__.";
515        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
516        let fixed = rule.fix(&ctx).unwrap();
517
518        assert_eq!(fixed, "First **strong** and second **also strong**.");
519    }
520
521    #[test]
522    fn test_fix_preserves_code_blocks() {
523        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
524        let content = "```\n__strong__ in code\n```\n__strong__ outside";
525        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
526        let fixed = rule.fix(&ctx).unwrap();
527
528        assert_eq!(fixed, "```\n__strong__ in code\n```\n**strong** outside");
529    }
530
531    #[test]
532    fn test_multiline_content() {
533        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
534        let content = "Line 1 with __strong__\nLine 2 with __another__\nLine 3 normal";
535        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
536        let result = rule.check(&ctx).unwrap();
537
538        assert_eq!(result.len(), 2);
539        assert_eq!(result[0].line, 1);
540        assert_eq!(result[1].line, 2);
541    }
542
543    #[test]
544    fn test_nested_emphasis() {
545        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
546        let content = "This has __strong with *emphasis* inside__.";
547        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
548        let result = rule.check(&ctx).unwrap();
549
550        assert_eq!(result.len(), 1);
551    }
552
553    #[test]
554    fn test_empty_content() {
555        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
556        let content = "";
557        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
558        let result = rule.check(&ctx).unwrap();
559
560        assert_eq!(result.len(), 0);
561    }
562
563    #[test]
564    fn test_default_config() {
565        let rule = MD050StrongStyle::new(StrongStyle::Consistent);
566        let (name, _config) = rule.default_config_section().unwrap();
567        assert_eq!(name, "MD050");
568    }
569
570    #[test]
571    fn test_strong_in_links_not_flagged() {
572        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
573        let content = r#"Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
574
575Hint:
576
577- [An article on something](https://blog.yuo.be/2018/08/16/__init_subclass__-a-simpler-way-to-implement-class-registries-in-python/ "Some details on using `__init_subclass__`")
578
579
580[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
581        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
582        let result = rule.check(&ctx).unwrap();
583
584        // None of the __ patterns in links should be flagged
585        assert_eq!(result.len(), 0);
586    }
587
588    #[test]
589    fn test_strong_in_links_vs_outside_links() {
590        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
591        let content = r#"We're doing this because generator functions return a generator object which [is an iterator][generators are iterators] and **we need `__iter__` to return an [iterator][]**.
592
593Instead of assigning to `self.value`, we're relying on the [`__dict__`][__dict__] in our object to hold that value instead.
594
595This is __real strong text__ that should be flagged.
596
597[__dict__]: https://www.pythonmorsels.com/where-are-attributes-stored/"#;
598        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
599        let result = rule.check(&ctx).unwrap();
600
601        // Only the real strong text should be flagged, not the __ in links
602        assert_eq!(result.len(), 1);
603        assert!(
604            result[0]
605                .message
606                .contains("Strong emphasis should use ** instead of __")
607        );
608        // The flagged text should be "real strong text"
609        assert!(result[0].line > 4); // Should be on the line with "real strong text"
610    }
611
612    #[test]
613    fn test_front_matter_not_flagged() {
614        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
615        let content = "---\ntitle: What's __init__.py?\nother: __value__\n---\n\nThis __should be flagged__.";
616        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
617        let result = rule.check(&ctx).unwrap();
618
619        // Only the strong text outside front matter should be flagged
620        assert_eq!(result.len(), 1);
621        assert_eq!(result[0].line, 6);
622        assert!(
623            result[0]
624                .message
625                .contains("Strong emphasis should use ** instead of __")
626        );
627    }
628
629    #[test]
630    fn test_html_tags_not_flagged() {
631        let rule = MD050StrongStyle::new(StrongStyle::Asterisk);
632        let content = r#"# Test
633
634This has HTML with underscores:
635
636<iframe src="https://example.com/__init__/__repr__"> </iframe>
637
638This __should be flagged__ as inconsistent."#;
639        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
640        let result = rule.check(&ctx).unwrap();
641
642        // Only the strong text outside HTML tags should be flagged
643        assert_eq!(result.len(), 1);
644        assert_eq!(result[0].line, 7);
645        assert!(
646            result[0]
647                .message
648                .contains("Strong emphasis should use ** instead of __")
649        );
650    }
651}