rumdl_lib/rules/
md036_no_emphasis_only_first.rs

1//!
2//! Rule MD036: No emphasis used as a heading
3//!
4//! See [docs/md036.md](../../docs/md036.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::document_structure::DocumentStructure;
8use crate::utils::range_utils::calculate_emphasis_range;
9use lazy_static::lazy_static;
10use regex::Regex;
11use toml;
12
13mod md036_config;
14use md036_config::MD036Config;
15
16lazy_static! {
17    // Optimize regex patterns with compilation once at startup
18    // Note: The content between emphasis markers should not contain other emphasis markers
19    // to avoid matching nested emphasis like _**text**_ or **_text_**
20    static ref RE_ASTERISK_SINGLE: Regex = Regex::new(r"^\s*\*([^*_\n]+)\*\s*$").unwrap();
21    static ref RE_UNDERSCORE_SINGLE: Regex = Regex::new(r"^\s*_([^*_\n]+)_\s*$").unwrap();
22    static ref RE_ASTERISK_DOUBLE: Regex = Regex::new(r"^\s*\*\*([^*_\n]+)\*\*\s*$").unwrap();
23    static ref RE_UNDERSCORE_DOUBLE: Regex = Regex::new(r"^\s*__([^*_\n]+)__\s*$").unwrap();
24    static ref LIST_MARKER: Regex = Regex::new(r"^\s*(?:[*+-]|\d+\.)\s+").unwrap();
25    static ref BLOCKQUOTE_MARKER: Regex = Regex::new(r"^\s*>").unwrap();
26    static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
27    static ref HEADING_MARKER: Regex = Regex::new(r"^#+\s").unwrap();
28    static ref HEADING_WITH_EMPHASIS: Regex = Regex::new(r"^(#+\s+).*(?:\*\*|\*|__|_)").unwrap();
29    // Pattern to match common Table of Contents labels that should not be converted to headings
30    static ref TOC_LABEL_PATTERN: Regex = Regex::new(r"^\s*(?:\*\*|\*|__|_)(?:Table of Contents|Contents|TOC|Index)(?:\*\*|\*|__|_)\s*$").unwrap();
31}
32
33/// Rule MD036: Emphasis used instead of a heading
34#[derive(Clone, Default)]
35pub struct MD036NoEmphasisAsHeading {
36    config: MD036Config,
37}
38
39impl MD036NoEmphasisAsHeading {
40    pub fn new(punctuation: String) -> Self {
41        Self {
42            config: MD036Config { punctuation },
43        }
44    }
45
46    pub fn from_config_struct(config: MD036Config) -> Self {
47        Self { config }
48    }
49
50    fn ends_with_punctuation(&self, text: &str) -> bool {
51        if text.is_empty() {
52            return false;
53        }
54        let trimmed = text.trim();
55        if trimmed.is_empty() {
56            return false;
57        }
58        // Check if the last character is in the punctuation set
59        trimmed
60            .chars()
61            .last()
62            .is_some_and(|ch| self.config.punctuation.contains(ch))
63    }
64
65    fn contains_link_or_code(&self, text: &str) -> bool {
66        // Check for inline code: `code`
67        // This is simple but effective since we're checking text that's already
68        // been identified as emphasized content
69        if text.contains('`') {
70            return true;
71        }
72
73        // Check for markdown links: [text](url) or [text][ref]
74        // We need both [ and ] for it to be a potential link
75        // and either ( ) for inline links or ][ for reference links
76        if text.contains('[') && text.contains(']') {
77            // Check for inline link pattern [...](...)
78            if text.contains("](") {
79                return true;
80            }
81            // Check for reference link pattern [...][...] or [...][]
82            if text.contains("][") || text.ends_with(']') {
83                return true;
84            }
85        }
86
87        false
88    }
89
90    fn is_entire_line_emphasized(
91        &self,
92        line: &str,
93        doc_structure: &DocumentStructure,
94        line_num: usize,
95    ) -> Option<(usize, String, usize, usize)> {
96        let original_line = line;
97        let line = line.trim();
98
99        // Fast path for empty lines and lines that don't contain emphasis markers
100        if line.is_empty() || (!line.contains('*') && !line.contains('_')) {
101            return None;
102        }
103
104        // Skip if line is already a heading (but not a heading with emphasis)
105        if HEADING_MARKER.is_match(line) && !HEADING_WITH_EMPHASIS.is_match(line) {
106            return None;
107        }
108
109        // Skip if line is a Table of Contents label (common legitimate use of bold text)
110        if TOC_LABEL_PATTERN.is_match(line) {
111            return None;
112        }
113
114        // Skip if line is in a list, blockquote, or code block using DocumentStructure
115        if LIST_MARKER.is_match(line)
116            || BLOCKQUOTE_MARKER.is_match(line)
117            || doc_structure.is_in_code_block(line_num + 1)
118        // line_num is 0-based, but DocumentStructure expects 1-based
119        {
120            return None;
121        }
122
123        // Helper closure to check common conditions for all emphasis patterns
124        let check_emphasis = |text: &str, level: usize, pattern: String| -> Option<(usize, String, usize, usize)> {
125            // Check if text ends with punctuation - if so, don't flag it
126            if !self.config.punctuation.is_empty() && self.ends_with_punctuation(text) {
127                return None;
128            }
129            // Skip if text contains links or inline code (matches markdownlint behavior)
130            // In markdownlint, these would be multiple tokens and thus not flagged
131            if self.contains_link_or_code(text) {
132                return None;
133            }
134            // Find position in original line by looking for the emphasis pattern
135            let start_pos = original_line.find(&pattern).unwrap_or(0);
136            let end_pos = start_pos + pattern.len();
137            Some((level, text.to_string(), start_pos, end_pos))
138        };
139
140        // Check for *emphasis* pattern (entire line)
141        if let Some(caps) = RE_ASTERISK_SINGLE.captures(line) {
142            let text = caps.get(1).unwrap().as_str();
143            let pattern = format!("*{text}*");
144            return check_emphasis(text, 1, pattern);
145        }
146
147        // Check for _emphasis_ pattern (entire line)
148        if let Some(caps) = RE_UNDERSCORE_SINGLE.captures(line) {
149            let text = caps.get(1).unwrap().as_str();
150            let pattern = format!("_{text}_");
151            return check_emphasis(text, 1, pattern);
152        }
153
154        // Check for **strong** pattern (entire line)
155        if let Some(caps) = RE_ASTERISK_DOUBLE.captures(line) {
156            let text = caps.get(1).unwrap().as_str();
157            let pattern = format!("**{text}**");
158            return check_emphasis(text, 2, pattern);
159        }
160
161        // Check for __strong__ pattern (entire line)
162        if let Some(caps) = RE_UNDERSCORE_DOUBLE.captures(line) {
163            let text = caps.get(1).unwrap().as_str();
164            let pattern = format!("__{text}__");
165            return check_emphasis(text, 2, pattern);
166        }
167
168        None
169    }
170}
171
172impl Rule for MD036NoEmphasisAsHeading {
173    fn name(&self) -> &'static str {
174        "MD036"
175    }
176
177    fn description(&self) -> &'static str {
178        "Emphasis should not be used instead of a heading"
179    }
180
181    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
182        let content = ctx.content;
183        // Fast path for empty content or content without emphasis markers
184        if content.is_empty() || (!content.contains('*') && !content.contains('_')) {
185            return Ok(Vec::new());
186        }
187
188        // Use the optimized document structure approach
189        let doc_structure = DocumentStructure::new(content);
190        self.check_with_structure(ctx, &doc_structure)
191    }
192
193    /// Optimized check using pre-computed document structure
194    fn check_with_structure(
195        &self,
196        ctx: &crate::lint_context::LintContext,
197        doc_structure: &DocumentStructure,
198    ) -> LintResult {
199        let content = ctx.content;
200        // Fast path for empty content or content without emphasis markers
201        if content.is_empty() || (!content.contains('*') && !content.contains('_')) {
202            return Ok(Vec::new());
203        }
204
205        let mut warnings = Vec::new();
206
207        for (i, line) in content.lines().enumerate() {
208            // Skip obvious non-matches quickly
209            if line.trim().is_empty() || (!line.contains('*') && !line.contains('_')) {
210                continue;
211            }
212
213            if let Some((_level, text, start_pos, end_pos)) = self.is_entire_line_emphasized(line, doc_structure, i) {
214                let (start_line, start_col, end_line, end_col) =
215                    calculate_emphasis_range(i + 1, line, start_pos, end_pos);
216
217                warnings.push(LintWarning {
218                    rule_name: Some(self.name()),
219                    line: start_line,
220                    column: start_col,
221                    end_line,
222                    end_column: end_col,
223                    message: format!("Emphasis used instead of a heading: '{text}'"),
224                    severity: Severity::Warning,
225                    fix: None, // No automatic fix - too risky to convert to heading
226                });
227            }
228        }
229
230        Ok(warnings)
231    }
232
233    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
234        // MD036 does not provide automatic fixes
235        // Converting bold text to headings is too risky and can corrupt documents
236        // Users should manually decide if bold text should be a heading
237        Ok(ctx.content.to_string())
238    }
239
240    /// Check if this rule should be skipped for performance
241    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
242        // Skip if content is empty or has no emphasis markers
243        ctx.content.is_empty() || (!ctx.content.contains('*') && !ctx.content.contains('_'))
244    }
245
246    fn as_any(&self) -> &dyn std::any::Any {
247        self
248    }
249
250    fn default_config_section(&self) -> Option<(String, toml::Value)> {
251        let mut map = toml::map::Map::new();
252        map.insert(
253            "punctuation".to_string(),
254            toml::Value::String(self.config.punctuation.clone()),
255        );
256        Some((self.name().to_string(), toml::Value::Table(map)))
257    }
258
259    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
260    where
261        Self: Sized,
262    {
263        let punctuation = crate::config::get_rule_config_value::<String>(config, "MD036", "punctuation")
264            .unwrap_or_else(|| ".,;:!?".to_string());
265
266        Box::new(MD036NoEmphasisAsHeading::new(punctuation))
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273    use crate::lint_context::LintContext;
274
275    #[test]
276    fn test_single_asterisk_emphasis() {
277        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
278        let content = "*This is emphasized*\n\nRegular text";
279        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
280        let result = rule.check(&ctx).unwrap();
281
282        assert_eq!(result.len(), 1);
283        assert_eq!(result[0].line, 1);
284        assert!(
285            result[0]
286                .message
287                .contains("Emphasis used instead of a heading: 'This is emphasized'")
288        );
289    }
290
291    #[test]
292    fn test_single_underscore_emphasis() {
293        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
294        let content = "_This is emphasized_\n\nRegular text";
295        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
296        let result = rule.check(&ctx).unwrap();
297
298        assert_eq!(result.len(), 1);
299        assert_eq!(result[0].line, 1);
300        assert!(
301            result[0]
302                .message
303                .contains("Emphasis used instead of a heading: 'This is emphasized'")
304        );
305    }
306
307    #[test]
308    fn test_double_asterisk_strong() {
309        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
310        let content = "**This is strong**\n\nRegular text";
311        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
312        let result = rule.check(&ctx).unwrap();
313
314        assert_eq!(result.len(), 1);
315        assert_eq!(result[0].line, 1);
316        assert!(
317            result[0]
318                .message
319                .contains("Emphasis used instead of a heading: 'This is strong'")
320        );
321    }
322
323    #[test]
324    fn test_double_underscore_strong() {
325        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
326        let content = "__This is strong__\n\nRegular text";
327        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
328        let result = rule.check(&ctx).unwrap();
329
330        assert_eq!(result.len(), 1);
331        assert_eq!(result[0].line, 1);
332        assert!(
333            result[0]
334                .message
335                .contains("Emphasis used instead of a heading: 'This is strong'")
336        );
337    }
338
339    #[test]
340    fn test_emphasis_with_punctuation() {
341        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
342        let content = "**Important Note:**\n\nRegular text";
343        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
344        let result = rule.check(&ctx).unwrap();
345
346        // Emphasis with punctuation should NOT be flagged (matches markdownlint)
347        assert_eq!(result.len(), 0);
348    }
349
350    #[test]
351    fn test_emphasis_in_paragraph() {
352        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
353        let content = "This is a paragraph with *emphasis* in the middle.";
354        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
355        let result = rule.check(&ctx).unwrap();
356
357        // Should not flag emphasis within a line
358        assert_eq!(result.len(), 0);
359    }
360
361    #[test]
362    fn test_emphasis_in_list() {
363        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
364        let content = "- *List item with emphasis*\n- Another item";
365        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
366        let result = rule.check(&ctx).unwrap();
367
368        // Should not flag emphasis in list items
369        assert_eq!(result.len(), 0);
370    }
371
372    #[test]
373    fn test_emphasis_in_blockquote() {
374        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
375        let content = "> *Quote with emphasis*\n> Another line";
376        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
377        let result = rule.check(&ctx).unwrap();
378
379        // Should not flag emphasis in blockquotes
380        assert_eq!(result.len(), 0);
381    }
382
383    #[test]
384    fn test_emphasis_in_code_block() {
385        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
386        let content = "```\n*Not emphasis in code*\n```";
387        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
388        let result = rule.check(&ctx).unwrap();
389
390        // Should not flag emphasis in code blocks
391        assert_eq!(result.len(), 0);
392    }
393
394    #[test]
395    fn test_toc_label() {
396        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
397        let content = "**Table of Contents**\n\n- Item 1\n- Item 2";
398        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
399        let result = rule.check(&ctx).unwrap();
400
401        // Should not flag common TOC labels
402        assert_eq!(result.len(), 0);
403    }
404
405    #[test]
406    fn test_already_heading() {
407        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
408        let content = "# **Bold in heading**\n\nRegular text";
409        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
410        let result = rule.check(&ctx).unwrap();
411
412        // Should not flag emphasis that's already in a heading
413        assert_eq!(result.len(), 0);
414    }
415
416    #[test]
417    fn test_fix_no_changes() {
418        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
419        let content = "*Convert to heading*\n\nRegular text";
420        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
421        let fixed = rule.fix(&ctx).unwrap();
422
423        // MD036 no longer provides automatic fixes
424        assert_eq!(fixed, content);
425    }
426
427    #[test]
428    fn test_fix_preserves_content() {
429        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
430        let content = "**Convert to heading**\n\nRegular text";
431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
432        let fixed = rule.fix(&ctx).unwrap();
433
434        // MD036 no longer provides automatic fixes
435        assert_eq!(fixed, content);
436    }
437
438    #[test]
439    fn test_empty_punctuation_config() {
440        let rule = MD036NoEmphasisAsHeading::new("".to_string());
441        let content = "**Important Note:**\n\nRegular text";
442        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
443        let result = rule.check(&ctx).unwrap();
444
445        // With empty punctuation config, all emphasis is flagged
446        assert_eq!(result.len(), 1);
447
448        let fixed = rule.fix(&ctx).unwrap();
449        // MD036 no longer provides automatic fixes
450        assert_eq!(fixed, content);
451    }
452
453    #[test]
454    fn test_multiple_emphasized_lines() {
455        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
456        let content = "*First heading*\n\nSome text\n\n**Second heading**\n\nMore text";
457        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
458        let result = rule.check(&ctx).unwrap();
459
460        assert_eq!(result.len(), 2);
461        assert_eq!(result[0].line, 1);
462        assert_eq!(result[1].line, 5);
463    }
464
465    #[test]
466    fn test_whitespace_handling() {
467        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
468        let content = "  **Indented emphasis**  \n\nRegular text";
469        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
470        let result = rule.check(&ctx).unwrap();
471
472        assert_eq!(result.len(), 1);
473        assert_eq!(result[0].line, 1);
474    }
475
476    #[test]
477    fn test_nested_emphasis() {
478        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
479        let content = "***Not a simple emphasis***\n\nRegular text";
480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
481        let result = rule.check(&ctx).unwrap();
482
483        // Nested emphasis (3 asterisks) should not match our patterns
484        assert_eq!(result.len(), 0);
485    }
486
487    #[test]
488    fn test_emphasis_with_newlines() {
489        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
490        let content = "*First line\nSecond line*\n\nRegular text";
491        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
492        let result = rule.check(&ctx).unwrap();
493
494        // Multi-line emphasis should not be flagged
495        assert_eq!(result.len(), 0);
496    }
497
498    #[test]
499    fn test_fix_preserves_trailing_newline() {
500        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
501        let content = "*Convert to heading*\n";
502        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
503        let fixed = rule.fix(&ctx).unwrap();
504
505        // MD036 no longer provides automatic fixes
506        assert_eq!(fixed, content);
507    }
508
509    #[test]
510    fn test_default_config() {
511        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
512        let (name, config) = rule.default_config_section().unwrap();
513        assert_eq!(name, "MD036");
514
515        let table = config.as_table().unwrap();
516        assert_eq!(table.get("punctuation").unwrap().as_str().unwrap(), ".,;:!?");
517    }
518
519    #[test]
520    fn test_image_caption_scenario() {
521        // Test the specific issue from #23 - bold text used as image caption
522        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
523        let content = "#### Métriques\n\n**commits par année : rumdl**\n\n![rumdl Commits By Year image](commits_by_year.png \"commits par année : rumdl\")";
524        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
525        let result = rule.check(&ctx).unwrap();
526
527        // Should detect the bold text even though it's followed by an image
528        assert_eq!(result.len(), 1);
529        assert_eq!(result[0].line, 3);
530        assert!(result[0].message.contains("commits par année : rumdl"));
531
532        // But should NOT provide a fix
533        assert!(result[0].fix.is_none());
534
535        // And the fix method should return unchanged content
536        let fixed = rule.fix(&ctx).unwrap();
537        assert_eq!(fixed, content);
538    }
539
540    #[test]
541    fn test_bold_with_colon_no_punctuation_config() {
542        // Test that with empty punctuation config, even text ending with colon is flagged
543        let rule = MD036NoEmphasisAsHeading::new("".to_string());
544        let content = "**commits par année : rumdl**\n\nSome text";
545        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
546        let result = rule.check(&ctx).unwrap();
547
548        // With empty punctuation config, this should be flagged
549        assert_eq!(result.len(), 1);
550        assert!(result[0].fix.is_none());
551    }
552
553    #[test]
554    fn test_bold_with_colon_default_config() {
555        // Test that with default punctuation config, text ending with colon is NOT flagged
556        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
557        let content = "**Important Note:**\n\nSome text";
558        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
559        let result = rule.check(&ctx).unwrap();
560
561        // With default punctuation including colon, this should NOT be flagged
562        assert_eq!(result.len(), 0);
563    }
564}