rumdl_lib/rules/
md036_no_emphasis_only_first.rs

1//!
2//! Rule MD036: No emphasis used as a heading
3//!
4//! See [docs/md036.md](../../docs/md036.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::range_utils::calculate_emphasis_range;
8use lazy_static::lazy_static;
9use regex::Regex;
10use toml;
11
12mod md036_config;
13use md036_config::MD036Config;
14
15lazy_static! {
16    // Optimize regex patterns with compilation once at startup
17    // Note: The content between emphasis markers should not contain other emphasis markers
18    // to avoid matching nested emphasis like _**text**_ or **_text_**
19    static ref RE_ASTERISK_SINGLE: Regex = Regex::new(r"^\s*\*([^*_\n]+)\*\s*$").unwrap();
20    static ref RE_UNDERSCORE_SINGLE: Regex = Regex::new(r"^\s*_([^*_\n]+)_\s*$").unwrap();
21    static ref RE_ASTERISK_DOUBLE: Regex = Regex::new(r"^\s*\*\*([^*_\n]+)\*\*\s*$").unwrap();
22    static ref RE_UNDERSCORE_DOUBLE: Regex = Regex::new(r"^\s*__([^*_\n]+)__\s*$").unwrap();
23    static ref LIST_MARKER: Regex = Regex::new(r"^\s*(?:[*+-]|\d+\.)\s+").unwrap();
24    static ref BLOCKQUOTE_MARKER: Regex = Regex::new(r"^\s*>").unwrap();
25    static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
26    static ref HEADING_MARKER: Regex = Regex::new(r"^#+\s").unwrap();
27    static ref HEADING_WITH_EMPHASIS: Regex = Regex::new(r"^(#+\s+).*(?:\*\*|\*|__|_)").unwrap();
28    // Pattern to match common Table of Contents labels that should not be converted to headings
29    static ref TOC_LABEL_PATTERN: Regex = Regex::new(r"^\s*(?:\*\*|\*|__|_)(?:Table of Contents|Contents|TOC|Index)(?:\*\*|\*|__|_)\s*$").unwrap();
30}
31
32/// Rule MD036: Emphasis used instead of a heading
33#[derive(Clone, Default)]
34pub struct MD036NoEmphasisAsHeading {
35    config: MD036Config,
36}
37
38impl MD036NoEmphasisAsHeading {
39    pub fn new(punctuation: String) -> Self {
40        Self {
41            config: MD036Config { punctuation },
42        }
43    }
44
45    pub fn from_config_struct(config: MD036Config) -> Self {
46        Self { config }
47    }
48
49    fn ends_with_punctuation(&self, text: &str) -> bool {
50        if text.is_empty() {
51            return false;
52        }
53        let trimmed = text.trim();
54        if trimmed.is_empty() {
55            return false;
56        }
57        // Check if the last character is in the punctuation set
58        trimmed
59            .chars()
60            .last()
61            .is_some_and(|ch| self.config.punctuation.contains(ch))
62    }
63
64    fn contains_link_or_code(&self, text: &str) -> bool {
65        // Check for inline code: `code`
66        // This is simple but effective since we're checking text that's already
67        // been identified as emphasized content
68        if text.contains('`') {
69            return true;
70        }
71
72        // Check for markdown links: [text](url) or [text][ref]
73        // We need both [ and ] for it to be a potential link
74        // and either ( ) for inline links or ][ for reference links
75        if text.contains('[') && text.contains(']') {
76            // Check for inline link pattern [...](...)
77            if text.contains("](") {
78                return true;
79            }
80            // Check for reference link pattern [...][...] or [...][]
81            if text.contains("][") || text.ends_with(']') {
82                return true;
83            }
84        }
85
86        false
87    }
88
89    fn is_entire_line_emphasized(
90        &self,
91        line: &str,
92        ctx: &crate::lint_context::LintContext,
93        line_num: usize,
94    ) -> Option<(usize, String, usize, usize)> {
95        let original_line = line;
96        let line = line.trim();
97
98        // Fast path for empty lines and lines that don't contain emphasis markers
99        if line.is_empty() || (!line.contains('*') && !line.contains('_')) {
100            return None;
101        }
102
103        // Skip if line is already a heading (but not a heading with emphasis)
104        if HEADING_MARKER.is_match(line) && !HEADING_WITH_EMPHASIS.is_match(line) {
105            return None;
106        }
107
108        // Skip if line is a Table of Contents label (common legitimate use of bold text)
109        if TOC_LABEL_PATTERN.is_match(line) {
110            return None;
111        }
112
113        // Skip if line is in a list, blockquote, or code block
114        if LIST_MARKER.is_match(line)
115            || BLOCKQUOTE_MARKER.is_match(line)
116            || ctx.line_info(line_num + 1).is_some_and(|info| info.in_code_block)
117        // line_num is 0-based, but LintContext expects 1-based
118        {
119            return None;
120        }
121
122        // Helper closure to check common conditions for all emphasis patterns
123        let check_emphasis = |text: &str, level: usize, pattern: String| -> Option<(usize, String, usize, usize)> {
124            // Check if text ends with punctuation - if so, don't flag it
125            if !self.config.punctuation.is_empty() && self.ends_with_punctuation(text) {
126                return None;
127            }
128            // Skip if text contains links or inline code (matches markdownlint behavior)
129            // In markdownlint, these would be multiple tokens and thus not flagged
130            if self.contains_link_or_code(text) {
131                return None;
132            }
133            // Find position in original line by looking for the emphasis pattern
134            let start_pos = original_line.find(&pattern).unwrap_or(0);
135            let end_pos = start_pos + pattern.len();
136            Some((level, text.to_string(), start_pos, end_pos))
137        };
138
139        // Check for *emphasis* pattern (entire line)
140        if let Some(caps) = RE_ASTERISK_SINGLE.captures(line) {
141            let text = caps.get(1).unwrap().as_str();
142            let pattern = format!("*{text}*");
143            return check_emphasis(text, 1, pattern);
144        }
145
146        // Check for _emphasis_ pattern (entire line)
147        if let Some(caps) = RE_UNDERSCORE_SINGLE.captures(line) {
148            let text = caps.get(1).unwrap().as_str();
149            let pattern = format!("_{text}_");
150            return check_emphasis(text, 1, pattern);
151        }
152
153        // Check for **strong** pattern (entire line)
154        if let Some(caps) = RE_ASTERISK_DOUBLE.captures(line) {
155            let text = caps.get(1).unwrap().as_str();
156            let pattern = format!("**{text}**");
157            return check_emphasis(text, 2, pattern);
158        }
159
160        // Check for __strong__ pattern (entire line)
161        if let Some(caps) = RE_UNDERSCORE_DOUBLE.captures(line) {
162            let text = caps.get(1).unwrap().as_str();
163            let pattern = format!("__{text}__");
164            return check_emphasis(text, 2, pattern);
165        }
166
167        None
168    }
169}
170
171impl Rule for MD036NoEmphasisAsHeading {
172    fn name(&self) -> &'static str {
173        "MD036"
174    }
175
176    fn description(&self) -> &'static str {
177        "Emphasis should not be used instead of a heading"
178    }
179
180    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
181        let content = ctx.content;
182        // Fast path for empty content or content without emphasis markers
183        if content.is_empty() || (!content.contains('*') && !content.contains('_')) {
184            return Ok(Vec::new());
185        }
186
187        let mut warnings = Vec::new();
188
189        for (i, line) in content.lines().enumerate() {
190            // Skip obvious non-matches quickly
191            if line.trim().is_empty() || (!line.contains('*') && !line.contains('_')) {
192                continue;
193            }
194
195            if let Some((_level, text, start_pos, end_pos)) = self.is_entire_line_emphasized(line, ctx, i) {
196                let (start_line, start_col, end_line, end_col) =
197                    calculate_emphasis_range(i + 1, line, start_pos, end_pos);
198
199                warnings.push(LintWarning {
200                    rule_name: Some(self.name()),
201                    line: start_line,
202                    column: start_col,
203                    end_line,
204                    end_column: end_col,
205                    message: format!("Emphasis used instead of a heading: '{text}'"),
206                    severity: Severity::Warning,
207                    fix: None, // No automatic fix - too risky to convert to heading
208                });
209            }
210        }
211
212        Ok(warnings)
213    }
214
215    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
216        // MD036 does not provide automatic fixes
217        // Converting bold text to headings is too risky and can corrupt documents
218        // Users should manually decide if bold text should be a heading
219        Ok(ctx.content.to_string())
220    }
221
222    /// Check if this rule should be skipped for performance
223    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
224        // Skip if content is empty or has no emphasis markers
225        ctx.content.is_empty() || !ctx.likely_has_emphasis()
226    }
227
228    fn as_any(&self) -> &dyn std::any::Any {
229        self
230    }
231
232    fn default_config_section(&self) -> Option<(String, toml::Value)> {
233        let mut map = toml::map::Map::new();
234        map.insert(
235            "punctuation".to_string(),
236            toml::Value::String(self.config.punctuation.clone()),
237        );
238        Some((self.name().to_string(), toml::Value::Table(map)))
239    }
240
241    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
242    where
243        Self: Sized,
244    {
245        let punctuation = crate::config::get_rule_config_value::<String>(config, "MD036", "punctuation")
246            .unwrap_or_else(|| ".,;:!?".to_string());
247
248        Box::new(MD036NoEmphasisAsHeading::new(punctuation))
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use crate::lint_context::LintContext;
256
257    #[test]
258    fn test_single_asterisk_emphasis() {
259        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
260        let content = "*This is emphasized*\n\nRegular text";
261        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
262        let result = rule.check(&ctx).unwrap();
263
264        assert_eq!(result.len(), 1);
265        assert_eq!(result[0].line, 1);
266        assert!(
267            result[0]
268                .message
269                .contains("Emphasis used instead of a heading: 'This is emphasized'")
270        );
271    }
272
273    #[test]
274    fn test_single_underscore_emphasis() {
275        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
276        let content = "_This is emphasized_\n\nRegular text";
277        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
278        let result = rule.check(&ctx).unwrap();
279
280        assert_eq!(result.len(), 1);
281        assert_eq!(result[0].line, 1);
282        assert!(
283            result[0]
284                .message
285                .contains("Emphasis used instead of a heading: 'This is emphasized'")
286        );
287    }
288
289    #[test]
290    fn test_double_asterisk_strong() {
291        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
292        let content = "**This is strong**\n\nRegular text";
293        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
294        let result = rule.check(&ctx).unwrap();
295
296        assert_eq!(result.len(), 1);
297        assert_eq!(result[0].line, 1);
298        assert!(
299            result[0]
300                .message
301                .contains("Emphasis used instead of a heading: 'This is strong'")
302        );
303    }
304
305    #[test]
306    fn test_double_underscore_strong() {
307        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
308        let content = "__This is strong__\n\nRegular text";
309        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
310        let result = rule.check(&ctx).unwrap();
311
312        assert_eq!(result.len(), 1);
313        assert_eq!(result[0].line, 1);
314        assert!(
315            result[0]
316                .message
317                .contains("Emphasis used instead of a heading: 'This is strong'")
318        );
319    }
320
321    #[test]
322    fn test_emphasis_with_punctuation() {
323        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
324        let content = "**Important Note:**\n\nRegular text";
325        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
326        let result = rule.check(&ctx).unwrap();
327
328        // Emphasis with punctuation should NOT be flagged (matches markdownlint)
329        assert_eq!(result.len(), 0);
330    }
331
332    #[test]
333    fn test_emphasis_in_paragraph() {
334        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
335        let content = "This is a paragraph with *emphasis* in the middle.";
336        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
337        let result = rule.check(&ctx).unwrap();
338
339        // Should not flag emphasis within a line
340        assert_eq!(result.len(), 0);
341    }
342
343    #[test]
344    fn test_emphasis_in_list() {
345        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
346        let content = "- *List item with emphasis*\n- Another item";
347        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
348        let result = rule.check(&ctx).unwrap();
349
350        // Should not flag emphasis in list items
351        assert_eq!(result.len(), 0);
352    }
353
354    #[test]
355    fn test_emphasis_in_blockquote() {
356        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
357        let content = "> *Quote with emphasis*\n> Another line";
358        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
359        let result = rule.check(&ctx).unwrap();
360
361        // Should not flag emphasis in blockquotes
362        assert_eq!(result.len(), 0);
363    }
364
365    #[test]
366    fn test_emphasis_in_code_block() {
367        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
368        let content = "```\n*Not emphasis in code*\n```";
369        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
370        let result = rule.check(&ctx).unwrap();
371
372        // Should not flag emphasis in code blocks
373        assert_eq!(result.len(), 0);
374    }
375
376    #[test]
377    fn test_toc_label() {
378        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
379        let content = "**Table of Contents**\n\n- Item 1\n- Item 2";
380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
381        let result = rule.check(&ctx).unwrap();
382
383        // Should not flag common TOC labels
384        assert_eq!(result.len(), 0);
385    }
386
387    #[test]
388    fn test_already_heading() {
389        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
390        let content = "# **Bold in heading**\n\nRegular text";
391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
392        let result = rule.check(&ctx).unwrap();
393
394        // Should not flag emphasis that's already in a heading
395        assert_eq!(result.len(), 0);
396    }
397
398    #[test]
399    fn test_fix_no_changes() {
400        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
401        let content = "*Convert to heading*\n\nRegular text";
402        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
403        let fixed = rule.fix(&ctx).unwrap();
404
405        // MD036 no longer provides automatic fixes
406        assert_eq!(fixed, content);
407    }
408
409    #[test]
410    fn test_fix_preserves_content() {
411        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
412        let content = "**Convert to heading**\n\nRegular text";
413        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
414        let fixed = rule.fix(&ctx).unwrap();
415
416        // MD036 no longer provides automatic fixes
417        assert_eq!(fixed, content);
418    }
419
420    #[test]
421    fn test_empty_punctuation_config() {
422        let rule = MD036NoEmphasisAsHeading::new("".to_string());
423        let content = "**Important Note:**\n\nRegular text";
424        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
425        let result = rule.check(&ctx).unwrap();
426
427        // With empty punctuation config, all emphasis is flagged
428        assert_eq!(result.len(), 1);
429
430        let fixed = rule.fix(&ctx).unwrap();
431        // MD036 no longer provides automatic fixes
432        assert_eq!(fixed, content);
433    }
434
435    #[test]
436    fn test_multiple_emphasized_lines() {
437        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
438        let content = "*First heading*\n\nSome text\n\n**Second heading**\n\nMore text";
439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
440        let result = rule.check(&ctx).unwrap();
441
442        assert_eq!(result.len(), 2);
443        assert_eq!(result[0].line, 1);
444        assert_eq!(result[1].line, 5);
445    }
446
447    #[test]
448    fn test_whitespace_handling() {
449        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
450        let content = "  **Indented emphasis**  \n\nRegular text";
451        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
452        let result = rule.check(&ctx).unwrap();
453
454        assert_eq!(result.len(), 1);
455        assert_eq!(result[0].line, 1);
456    }
457
458    #[test]
459    fn test_nested_emphasis() {
460        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
461        let content = "***Not a simple emphasis***\n\nRegular text";
462        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
463        let result = rule.check(&ctx).unwrap();
464
465        // Nested emphasis (3 asterisks) should not match our patterns
466        assert_eq!(result.len(), 0);
467    }
468
469    #[test]
470    fn test_emphasis_with_newlines() {
471        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
472        let content = "*First line\nSecond line*\n\nRegular text";
473        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
474        let result = rule.check(&ctx).unwrap();
475
476        // Multi-line emphasis should not be flagged
477        assert_eq!(result.len(), 0);
478    }
479
480    #[test]
481    fn test_fix_preserves_trailing_newline() {
482        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
483        let content = "*Convert to heading*\n";
484        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
485        let fixed = rule.fix(&ctx).unwrap();
486
487        // MD036 no longer provides automatic fixes
488        assert_eq!(fixed, content);
489    }
490
491    #[test]
492    fn test_default_config() {
493        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
494        let (name, config) = rule.default_config_section().unwrap();
495        assert_eq!(name, "MD036");
496
497        let table = config.as_table().unwrap();
498        assert_eq!(table.get("punctuation").unwrap().as_str().unwrap(), ".,;:!?");
499    }
500
501    #[test]
502    fn test_image_caption_scenario() {
503        // Test the specific issue from #23 - bold text used as image caption
504        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
505        let content = "#### Métriques\n\n**commits par année : rumdl**\n\n![rumdl Commits By Year image](commits_by_year.png \"commits par année : rumdl\")";
506        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
507        let result = rule.check(&ctx).unwrap();
508
509        // Should detect the bold text even though it's followed by an image
510        assert_eq!(result.len(), 1);
511        assert_eq!(result[0].line, 3);
512        assert!(result[0].message.contains("commits par année : rumdl"));
513
514        // But should NOT provide a fix
515        assert!(result[0].fix.is_none());
516
517        // And the fix method should return unchanged content
518        let fixed = rule.fix(&ctx).unwrap();
519        assert_eq!(fixed, content);
520    }
521
522    #[test]
523    fn test_bold_with_colon_no_punctuation_config() {
524        // Test that with empty punctuation config, even text ending with colon is flagged
525        let rule = MD036NoEmphasisAsHeading::new("".to_string());
526        let content = "**commits par année : rumdl**\n\nSome text";
527        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
528        let result = rule.check(&ctx).unwrap();
529
530        // With empty punctuation config, this should be flagged
531        assert_eq!(result.len(), 1);
532        assert!(result[0].fix.is_none());
533    }
534
535    #[test]
536    fn test_bold_with_colon_default_config() {
537        // Test that with default punctuation config, text ending with colon is NOT flagged
538        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
539        let content = "**Important Note:**\n\nSome text";
540        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
541        let result = rule.check(&ctx).unwrap();
542
543        // With default punctuation including colon, this should NOT be flagged
544        assert_eq!(result.len(), 0);
545    }
546}