rumdl_lib/rules/
md036_no_emphasis_only_first.rs

1//!
2//! Rule MD036: No emphasis used as a heading
3//!
4//! See [docs/md036.md](../../docs/md036.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::range_utils::calculate_emphasis_range;
8use regex::Regex;
9use std::sync::LazyLock;
10use toml;
11
12mod md036_config;
13use md036_config::MD036Config;
14
15// Optimize regex patterns with compilation once at startup
16// Note: The content between emphasis markers should not contain other emphasis markers
17// to avoid matching nested emphasis like _**text**_ or **_text_**
18static RE_ASTERISK_SINGLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*([^*_\n]+)\*\s*$").unwrap());
19static RE_UNDERSCORE_SINGLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*_([^*_\n]+)_\s*$").unwrap());
20static RE_ASTERISK_DOUBLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*\*([^*_\n]+)\*\*\s*$").unwrap());
21static RE_UNDERSCORE_DOUBLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*__([^*_\n]+)__\s*$").unwrap());
22static LIST_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*(?:[*+-]|\d+\.)\s+").unwrap());
23static BLOCKQUOTE_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*>").unwrap());
24static HEADING_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^#+\s").unwrap());
25static HEADING_WITH_EMPHASIS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(#+\s+).*(?:\*\*|\*|__|_)").unwrap());
26// Pattern to match common Table of Contents labels that should not be converted to headings
27static TOC_LABEL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
28    Regex::new(r"^\s*(?:\*\*|\*|__|_)(?:Table of Contents|Contents|TOC|Index)(?:\*\*|\*|__|_)\s*$").unwrap()
29});
30
31/// Rule MD036: Emphasis used instead of a heading
32#[derive(Clone, Default)]
33pub struct MD036NoEmphasisAsHeading {
34    config: MD036Config,
35}
36
37impl MD036NoEmphasisAsHeading {
38    pub fn new(punctuation: String) -> Self {
39        Self {
40            config: MD036Config { punctuation },
41        }
42    }
43
44    pub fn from_config_struct(config: MD036Config) -> Self {
45        Self { config }
46    }
47
48    fn ends_with_punctuation(&self, text: &str) -> bool {
49        if text.is_empty() {
50            return false;
51        }
52        let trimmed = text.trim();
53        if trimmed.is_empty() {
54            return false;
55        }
56        // Check if the last character is in the punctuation set
57        trimmed
58            .chars()
59            .last()
60            .is_some_and(|ch| self.config.punctuation.contains(ch))
61    }
62
63    fn contains_link_or_code(&self, text: &str) -> bool {
64        // Check for inline code: `code`
65        // This is simple but effective since we're checking text that's already
66        // been identified as emphasized content
67        if text.contains('`') {
68            return true;
69        }
70
71        // Check for markdown links: [text](url) or [text][ref]
72        // We need both [ and ] for it to be a potential link
73        // and either ( ) for inline links or ][ for reference links
74        if text.contains('[') && text.contains(']') {
75            // Check for inline link pattern [...](...)
76            if text.contains("](") {
77                return true;
78            }
79            // Check for reference link pattern [...][...] or [...][]
80            if text.contains("][") || text.ends_with(']') {
81                return true;
82            }
83        }
84
85        false
86    }
87
88    fn is_entire_line_emphasized(
89        &self,
90        line: &str,
91        ctx: &crate::lint_context::LintContext,
92        line_num: usize,
93    ) -> Option<(usize, String, usize, usize)> {
94        let original_line = line;
95        let line = line.trim();
96
97        // Fast path for empty lines and lines that don't contain emphasis markers
98        if line.is_empty() || (!line.contains('*') && !line.contains('_')) {
99            return None;
100        }
101
102        // Skip if line is already a heading (but not a heading with emphasis)
103        if HEADING_MARKER.is_match(line) && !HEADING_WITH_EMPHASIS.is_match(line) {
104            return None;
105        }
106
107        // Skip if line is a Table of Contents label (common legitimate use of bold text)
108        if TOC_LABEL_PATTERN.is_match(line) {
109            return None;
110        }
111
112        // Skip if line is in a list, blockquote, code block, or HTML comment
113        if LIST_MARKER.is_match(line)
114            || BLOCKQUOTE_MARKER.is_match(line)
115            || ctx
116                .line_info(line_num + 1)
117                .is_some_and(|info| info.in_code_block || info.in_html_comment)
118        {
119            return None;
120        }
121
122        // Helper closure to check common conditions for all emphasis patterns
123        let check_emphasis = |text: &str, level: usize, pattern: String| -> Option<(usize, String, usize, usize)> {
124            // Check if text ends with punctuation - if so, don't flag it
125            if !self.config.punctuation.is_empty() && self.ends_with_punctuation(text) {
126                return None;
127            }
128            // Skip if text contains links or inline code (matches markdownlint behavior)
129            // In markdownlint, these would be multiple tokens and thus not flagged
130            if self.contains_link_or_code(text) {
131                return None;
132            }
133            // Find position in original line by looking for the emphasis pattern
134            let start_pos = original_line.find(&pattern).unwrap_or(0);
135            let end_pos = start_pos + pattern.len();
136            Some((level, text.to_string(), start_pos, end_pos))
137        };
138
139        // Check for *emphasis* pattern (entire line)
140        if let Some(caps) = RE_ASTERISK_SINGLE.captures(line) {
141            let text = caps.get(1).unwrap().as_str();
142            let pattern = format!("*{text}*");
143            return check_emphasis(text, 1, pattern);
144        }
145
146        // Check for _emphasis_ pattern (entire line)
147        if let Some(caps) = RE_UNDERSCORE_SINGLE.captures(line) {
148            let text = caps.get(1).unwrap().as_str();
149            let pattern = format!("_{text}_");
150            return check_emphasis(text, 1, pattern);
151        }
152
153        // Check for **strong** pattern (entire line)
154        if let Some(caps) = RE_ASTERISK_DOUBLE.captures(line) {
155            let text = caps.get(1).unwrap().as_str();
156            let pattern = format!("**{text}**");
157            return check_emphasis(text, 2, pattern);
158        }
159
160        // Check for __strong__ pattern (entire line)
161        if let Some(caps) = RE_UNDERSCORE_DOUBLE.captures(line) {
162            let text = caps.get(1).unwrap().as_str();
163            let pattern = format!("__{text}__");
164            return check_emphasis(text, 2, pattern);
165        }
166
167        None
168    }
169}
170
171impl Rule for MD036NoEmphasisAsHeading {
172    fn name(&self) -> &'static str {
173        "MD036"
174    }
175
176    fn description(&self) -> &'static str {
177        "Emphasis should not be used instead of a heading"
178    }
179
180    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
181        let content = ctx.content;
182        // Fast path for empty content or content without emphasis markers
183        if content.is_empty() || (!content.contains('*') && !content.contains('_')) {
184            return Ok(Vec::new());
185        }
186
187        let mut warnings = Vec::new();
188
189        for (i, line) in content.lines().enumerate() {
190            // Skip obvious non-matches quickly
191            if line.trim().is_empty() || (!line.contains('*') && !line.contains('_')) {
192                continue;
193            }
194
195            if let Some((_level, text, start_pos, end_pos)) = self.is_entire_line_emphasized(line, ctx, i) {
196                let (start_line, start_col, end_line, end_col) =
197                    calculate_emphasis_range(i + 1, line, start_pos, end_pos);
198
199                warnings.push(LintWarning {
200                    rule_name: Some(self.name().to_string()),
201                    line: start_line,
202                    column: start_col,
203                    end_line,
204                    end_column: end_col,
205                    message: format!("Emphasis used instead of a heading: '{text}'"),
206                    severity: Severity::Warning,
207                    fix: None, // No automatic fix - too risky to convert to heading
208                });
209            }
210        }
211
212        Ok(warnings)
213    }
214
215    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
216        // MD036 does not provide automatic fixes
217        // Converting bold text to headings is too risky and can corrupt documents
218        // Users should manually decide if bold text should be a heading
219        Ok(ctx.content.to_string())
220    }
221
222    /// Check if this rule should be skipped for performance
223    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
224        // Skip if content is empty or has no emphasis markers
225        ctx.content.is_empty() || !ctx.likely_has_emphasis()
226    }
227
228    fn as_any(&self) -> &dyn std::any::Any {
229        self
230    }
231
232    fn default_config_section(&self) -> Option<(String, toml::Value)> {
233        let mut map = toml::map::Map::new();
234        map.insert(
235            "punctuation".to_string(),
236            toml::Value::String(self.config.punctuation.clone()),
237        );
238        Some((self.name().to_string(), toml::Value::Table(map)))
239    }
240
241    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
242    where
243        Self: Sized,
244    {
245        let punctuation = crate::config::get_rule_config_value::<String>(config, "MD036", "punctuation")
246            .unwrap_or_else(|| ".,;:!?".to_string());
247
248        Box::new(MD036NoEmphasisAsHeading::new(punctuation))
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use crate::lint_context::LintContext;
256
257    #[test]
258    fn test_single_asterisk_emphasis() {
259        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
260        let content = "*This is emphasized*\n\nRegular text";
261        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
262        let result = rule.check(&ctx).unwrap();
263
264        assert_eq!(result.len(), 1);
265        assert_eq!(result[0].line, 1);
266        assert!(
267            result[0]
268                .message
269                .contains("Emphasis used instead of a heading: 'This is emphasized'")
270        );
271    }
272
273    #[test]
274    fn test_single_underscore_emphasis() {
275        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
276        let content = "_This is emphasized_\n\nRegular text";
277        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
278        let result = rule.check(&ctx).unwrap();
279
280        assert_eq!(result.len(), 1);
281        assert_eq!(result[0].line, 1);
282        assert!(
283            result[0]
284                .message
285                .contains("Emphasis used instead of a heading: 'This is emphasized'")
286        );
287    }
288
289    #[test]
290    fn test_double_asterisk_strong() {
291        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
292        let content = "**This is strong**\n\nRegular text";
293        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
294        let result = rule.check(&ctx).unwrap();
295
296        assert_eq!(result.len(), 1);
297        assert_eq!(result[0].line, 1);
298        assert!(
299            result[0]
300                .message
301                .contains("Emphasis used instead of a heading: 'This is strong'")
302        );
303    }
304
305    #[test]
306    fn test_double_underscore_strong() {
307        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
308        let content = "__This is strong__\n\nRegular text";
309        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
310        let result = rule.check(&ctx).unwrap();
311
312        assert_eq!(result.len(), 1);
313        assert_eq!(result[0].line, 1);
314        assert!(
315            result[0]
316                .message
317                .contains("Emphasis used instead of a heading: 'This is strong'")
318        );
319    }
320
321    #[test]
322    fn test_emphasis_with_punctuation() {
323        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
324        let content = "**Important Note:**\n\nRegular text";
325        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
326        let result = rule.check(&ctx).unwrap();
327
328        // Emphasis with punctuation should NOT be flagged (matches markdownlint)
329        assert_eq!(result.len(), 0);
330    }
331
332    #[test]
333    fn test_emphasis_in_paragraph() {
334        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
335        let content = "This is a paragraph with *emphasis* in the middle.";
336        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
337        let result = rule.check(&ctx).unwrap();
338
339        // Should not flag emphasis within a line
340        assert_eq!(result.len(), 0);
341    }
342
343    #[test]
344    fn test_emphasis_in_list() {
345        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
346        let content = "- *List item with emphasis*\n- Another item";
347        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
348        let result = rule.check(&ctx).unwrap();
349
350        // Should not flag emphasis in list items
351        assert_eq!(result.len(), 0);
352    }
353
354    #[test]
355    fn test_emphasis_in_blockquote() {
356        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
357        let content = "> *Quote with emphasis*\n> Another line";
358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
359        let result = rule.check(&ctx).unwrap();
360
361        // Should not flag emphasis in blockquotes
362        assert_eq!(result.len(), 0);
363    }
364
365    #[test]
366    fn test_emphasis_in_code_block() {
367        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
368        let content = "```\n*Not emphasis in code*\n```";
369        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
370        let result = rule.check(&ctx).unwrap();
371
372        // Should not flag emphasis in code blocks
373        assert_eq!(result.len(), 0);
374    }
375
376    #[test]
377    fn test_emphasis_in_html_comment() {
378        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
379        let content = "<!--\n**bigger**\ncomment\n-->";
380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
381        let result = rule.check(&ctx).unwrap();
382
383        // Should not flag emphasis in HTML comments (matches markdownlint)
384        assert_eq!(
385            result.len(),
386            0,
387            "Expected no warnings for emphasis in HTML comment, got: {result:?}"
388        );
389    }
390
391    #[test]
392    fn test_toc_label() {
393        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
394        let content = "**Table of Contents**\n\n- Item 1\n- Item 2";
395        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
396        let result = rule.check(&ctx).unwrap();
397
398        // Should not flag common TOC labels
399        assert_eq!(result.len(), 0);
400    }
401
402    #[test]
403    fn test_already_heading() {
404        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
405        let content = "# **Bold in heading**\n\nRegular text";
406        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
407        let result = rule.check(&ctx).unwrap();
408
409        // Should not flag emphasis that's already in a heading
410        assert_eq!(result.len(), 0);
411    }
412
413    #[test]
414    fn test_fix_no_changes() {
415        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
416        let content = "*Convert to heading*\n\nRegular text";
417        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
418        let fixed = rule.fix(&ctx).unwrap();
419
420        // MD036 no longer provides automatic fixes
421        assert_eq!(fixed, content);
422    }
423
424    #[test]
425    fn test_fix_preserves_content() {
426        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
427        let content = "**Convert to heading**\n\nRegular text";
428        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
429        let fixed = rule.fix(&ctx).unwrap();
430
431        // MD036 no longer provides automatic fixes
432        assert_eq!(fixed, content);
433    }
434
435    #[test]
436    fn test_empty_punctuation_config() {
437        let rule = MD036NoEmphasisAsHeading::new("".to_string());
438        let content = "**Important Note:**\n\nRegular text";
439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
440        let result = rule.check(&ctx).unwrap();
441
442        // With empty punctuation config, all emphasis is flagged
443        assert_eq!(result.len(), 1);
444
445        let fixed = rule.fix(&ctx).unwrap();
446        // MD036 no longer provides automatic fixes
447        assert_eq!(fixed, content);
448    }
449
450    #[test]
451    fn test_multiple_emphasized_lines() {
452        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
453        let content = "*First heading*\n\nSome text\n\n**Second heading**\n\nMore text";
454        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
455        let result = rule.check(&ctx).unwrap();
456
457        assert_eq!(result.len(), 2);
458        assert_eq!(result[0].line, 1);
459        assert_eq!(result[1].line, 5);
460    }
461
462    #[test]
463    fn test_whitespace_handling() {
464        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
465        let content = "  **Indented emphasis**  \n\nRegular text";
466        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
467        let result = rule.check(&ctx).unwrap();
468
469        assert_eq!(result.len(), 1);
470        assert_eq!(result[0].line, 1);
471    }
472
473    #[test]
474    fn test_nested_emphasis() {
475        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
476        let content = "***Not a simple emphasis***\n\nRegular text";
477        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
478        let result = rule.check(&ctx).unwrap();
479
480        // Nested emphasis (3 asterisks) should not match our patterns
481        assert_eq!(result.len(), 0);
482    }
483
484    #[test]
485    fn test_emphasis_with_newlines() {
486        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
487        let content = "*First line\nSecond line*\n\nRegular text";
488        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
489        let result = rule.check(&ctx).unwrap();
490
491        // Multi-line emphasis should not be flagged
492        assert_eq!(result.len(), 0);
493    }
494
495    #[test]
496    fn test_fix_preserves_trailing_newline() {
497        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
498        let content = "*Convert to heading*\n";
499        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
500        let fixed = rule.fix(&ctx).unwrap();
501
502        // MD036 no longer provides automatic fixes
503        assert_eq!(fixed, content);
504    }
505
506    #[test]
507    fn test_default_config() {
508        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
509        let (name, config) = rule.default_config_section().unwrap();
510        assert_eq!(name, "MD036");
511
512        let table = config.as_table().unwrap();
513        assert_eq!(table.get("punctuation").unwrap().as_str().unwrap(), ".,;:!?");
514    }
515
516    #[test]
517    fn test_image_caption_scenario() {
518        // Test the specific issue from #23 - bold text used as image caption
519        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
520        let content = "#### Métriques\n\n**commits par année : rumdl**\n\n![rumdl Commits By Year image](commits_by_year.png \"commits par année : rumdl\")";
521        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
522        let result = rule.check(&ctx).unwrap();
523
524        // Should detect the bold text even though it's followed by an image
525        assert_eq!(result.len(), 1);
526        assert_eq!(result[0].line, 3);
527        assert!(result[0].message.contains("commits par année : rumdl"));
528
529        // But should NOT provide a fix
530        assert!(result[0].fix.is_none());
531
532        // And the fix method should return unchanged content
533        let fixed = rule.fix(&ctx).unwrap();
534        assert_eq!(fixed, content);
535    }
536
537    #[test]
538    fn test_bold_with_colon_no_punctuation_config() {
539        // Test that with empty punctuation config, even text ending with colon is flagged
540        let rule = MD036NoEmphasisAsHeading::new("".to_string());
541        let content = "**commits par année : rumdl**\n\nSome text";
542        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
543        let result = rule.check(&ctx).unwrap();
544
545        // With empty punctuation config, this should be flagged
546        assert_eq!(result.len(), 1);
547        assert!(result[0].fix.is_none());
548    }
549
550    #[test]
551    fn test_bold_with_colon_default_config() {
552        // Test that with default punctuation config, text ending with colon is NOT flagged
553        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
554        let content = "**Important Note:**\n\nSome text";
555        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
556        let result = rule.check(&ctx).unwrap();
557
558        // With default punctuation including colon, this should NOT be flagged
559        assert_eq!(result.len(), 0);
560    }
561}