rumdl_lib/rules/
md036_no_emphasis_only_first.rs

1//!
2//! Rule MD036: No emphasis used as a heading
3//!
4//! See [docs/md036.md](../../docs/md036.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::document_structure::DocumentStructure;
8use crate::utils::range_utils::calculate_emphasis_range;
9use lazy_static::lazy_static;
10use regex::Regex;
11use toml;
12
13mod md036_config;
14use md036_config::MD036Config;
15
16lazy_static! {
17    // Optimize regex patterns with compilation once at startup
18    static ref RE_ASTERISK_SINGLE: Regex = Regex::new(r"^\s*\*([^*\n]+)\*\s*$").unwrap();
19    static ref RE_UNDERSCORE_SINGLE: Regex = Regex::new(r"^\s*_([^_\n]+)_\s*$").unwrap();
20    static ref RE_ASTERISK_DOUBLE: Regex = Regex::new(r"^\s*\*\*([^*\n]+)\*\*\s*$").unwrap();
21    static ref RE_UNDERSCORE_DOUBLE: Regex = Regex::new(r"^\s*__([^_\n]+)__\s*$").unwrap();
22    static ref LIST_MARKER: Regex = Regex::new(r"^\s*(?:[*+-]|\d+\.)\s+").unwrap();
23    static ref BLOCKQUOTE_MARKER: Regex = Regex::new(r"^\s*>").unwrap();
24    static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
25    static ref HEADING_MARKER: Regex = Regex::new(r"^#+\s").unwrap();
26    static ref HEADING_WITH_EMPHASIS: Regex = Regex::new(r"^(#+\s+).*(?:\*\*|\*|__|_)").unwrap();
27    // Pattern to match common Table of Contents labels that should not be converted to headings
28    static ref TOC_LABEL_PATTERN: Regex = Regex::new(r"^\s*(?:\*\*|\*|__|_)(?:Table of Contents|Contents|TOC|Index)(?:\*\*|\*|__|_)\s*$").unwrap();
29}
30
31/// Rule MD036: Emphasis used instead of a heading
32#[derive(Clone, Default)]
33pub struct MD036NoEmphasisAsHeading {
34    config: MD036Config,
35}
36
37impl MD036NoEmphasisAsHeading {
38    pub fn new(punctuation: String) -> Self {
39        Self {
40            config: MD036Config { punctuation },
41        }
42    }
43
44    pub fn from_config_struct(config: MD036Config) -> Self {
45        Self { config }
46    }
47
48    fn ends_with_punctuation(&self, text: &str) -> bool {
49        if text.is_empty() {
50            return false;
51        }
52        let trimmed = text.trim();
53        if trimmed.is_empty() {
54            return false;
55        }
56        // Check if the last character is in the punctuation set
57        trimmed
58            .chars()
59            .last()
60            .is_some_and(|ch| self.config.punctuation.contains(ch))
61    }
62
63    fn is_entire_line_emphasized(
64        &self,
65        line: &str,
66        doc_structure: &DocumentStructure,
67        line_num: usize,
68    ) -> Option<(usize, String, usize, usize)> {
69        let original_line = line;
70        let line = line.trim();
71
72        // Fast path for empty lines and lines that don't contain emphasis markers
73        if line.is_empty() || (!line.contains('*') && !line.contains('_')) {
74            return None;
75        }
76
77        // Skip if line is already a heading (but not a heading with emphasis)
78        if HEADING_MARKER.is_match(line) && !HEADING_WITH_EMPHASIS.is_match(line) {
79            return None;
80        }
81
82        // Skip if line is a Table of Contents label (common legitimate use of bold text)
83        if TOC_LABEL_PATTERN.is_match(line) {
84            return None;
85        }
86
87        // Skip if line is in a list, blockquote, or code block using DocumentStructure
88        if LIST_MARKER.is_match(line)
89            || BLOCKQUOTE_MARKER.is_match(line)
90            || doc_structure.is_in_code_block(line_num + 1)
91        // line_num is 0-based, but DocumentStructure expects 1-based
92        {
93            return None;
94        }
95
96        // Check specific patterns directly without additional requirements
97        // Check for *emphasis* pattern (entire line)
98        if let Some(caps) = RE_ASTERISK_SINGLE.captures(line) {
99            let text = caps.get(1).unwrap().as_str();
100            // Check if text ends with punctuation - if so, don't flag it
101            if !self.config.punctuation.is_empty() && self.ends_with_punctuation(text) {
102                return None;
103            }
104            let _full_match = caps.get(0).unwrap();
105            // Find position in original line by looking for the emphasis pattern
106            let pattern = format!("*{text}*");
107            let start_pos = original_line.find(&pattern).unwrap_or(0);
108            let end_pos = start_pos + pattern.len();
109            return Some((1, text.to_string(), start_pos, end_pos));
110        }
111
112        // Check for _emphasis_ pattern (entire line)
113        if let Some(caps) = RE_UNDERSCORE_SINGLE.captures(line) {
114            let text = caps.get(1).unwrap().as_str();
115            // Check if text ends with punctuation - if so, don't flag it
116            if !self.config.punctuation.is_empty() && self.ends_with_punctuation(text) {
117                return None;
118            }
119            let _full_match = caps.get(0).unwrap();
120            // Find position in original line by looking for the emphasis pattern
121            let pattern = format!("_{text}_");
122            let start_pos = original_line.find(&pattern).unwrap_or(0);
123            let end_pos = start_pos + pattern.len();
124            return Some((1, text.to_string(), start_pos, end_pos));
125        }
126
127        // Check for **strong** pattern (entire line)
128        if let Some(caps) = RE_ASTERISK_DOUBLE.captures(line) {
129            let text = caps.get(1).unwrap().as_str();
130            // Check if text ends with punctuation - if so, don't flag it
131            if !self.config.punctuation.is_empty() && self.ends_with_punctuation(text) {
132                return None;
133            }
134            let _full_match = caps.get(0).unwrap();
135            // Find position in original line by looking for the emphasis pattern
136            let pattern = format!("**{text}**");
137            let start_pos = original_line.find(&pattern).unwrap_or(0);
138            let end_pos = start_pos + pattern.len();
139            return Some((2, text.to_string(), start_pos, end_pos));
140        }
141
142        // Check for __strong__ pattern (entire line)
143        if let Some(caps) = RE_UNDERSCORE_DOUBLE.captures(line) {
144            let text = caps.get(1).unwrap().as_str();
145            // Check if text ends with punctuation - if so, don't flag it
146            if !self.config.punctuation.is_empty() && self.ends_with_punctuation(text) {
147                return None;
148            }
149            let _full_match = caps.get(0).unwrap();
150            // Find position in original line by looking for the emphasis pattern
151            let pattern = format!("__{text}__");
152            let start_pos = original_line.find(&pattern).unwrap_or(0);
153            let end_pos = start_pos + pattern.len();
154            return Some((2, text.to_string(), start_pos, end_pos));
155        }
156
157        None
158    }
159}
160
161impl Rule for MD036NoEmphasisAsHeading {
162    fn name(&self) -> &'static str {
163        "MD036"
164    }
165
166    fn description(&self) -> &'static str {
167        "Emphasis should not be used instead of a heading"
168    }
169
170    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
171        let content = ctx.content;
172        // Fast path for empty content or content without emphasis markers
173        if content.is_empty() || (!content.contains('*') && !content.contains('_')) {
174            return Ok(Vec::new());
175        }
176
177        // Use the optimized document structure approach
178        let doc_structure = DocumentStructure::new(content);
179        self.check_with_structure(ctx, &doc_structure)
180    }
181
182    /// Optimized check using pre-computed document structure
183    fn check_with_structure(
184        &self,
185        ctx: &crate::lint_context::LintContext,
186        doc_structure: &DocumentStructure,
187    ) -> LintResult {
188        let content = ctx.content;
189        // Fast path for empty content or content without emphasis markers
190        if content.is_empty() || (!content.contains('*') && !content.contains('_')) {
191            return Ok(Vec::new());
192        }
193
194        let mut warnings = Vec::new();
195
196        for (i, line) in content.lines().enumerate() {
197            // Skip obvious non-matches quickly
198            if line.trim().is_empty() || (!line.contains('*') && !line.contains('_')) {
199                continue;
200            }
201
202            if let Some((_level, text, start_pos, end_pos)) = self.is_entire_line_emphasized(line, doc_structure, i) {
203                let (start_line, start_col, end_line, end_col) =
204                    calculate_emphasis_range(i + 1, line, start_pos, end_pos);
205
206                warnings.push(LintWarning {
207                    rule_name: Some(self.name()),
208                    line: start_line,
209                    column: start_col,
210                    end_line,
211                    end_column: end_col,
212                    message: format!("Emphasis used instead of a heading: '{text}'"),
213                    severity: Severity::Warning,
214                    fix: None, // No automatic fix - too risky to convert to heading
215                });
216            }
217        }
218
219        Ok(warnings)
220    }
221
222    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
223        // MD036 does not provide automatic fixes
224        // Converting bold text to headings is too risky and can corrupt documents
225        // Users should manually decide if bold text should be a heading
226        Ok(ctx.content.to_string())
227    }
228
229    /// Check if this rule should be skipped for performance
230    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
231        // Skip if content is empty or has no emphasis markers
232        ctx.content.is_empty() || (!ctx.content.contains('*') && !ctx.content.contains('_'))
233    }
234
235    fn as_any(&self) -> &dyn std::any::Any {
236        self
237    }
238
239    fn default_config_section(&self) -> Option<(String, toml::Value)> {
240        let mut map = toml::map::Map::new();
241        map.insert(
242            "punctuation".to_string(),
243            toml::Value::String(self.config.punctuation.clone()),
244        );
245        Some((self.name().to_string(), toml::Value::Table(map)))
246    }
247
248    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
249    where
250        Self: Sized,
251    {
252        let punctuation = crate::config::get_rule_config_value::<String>(config, "MD036", "punctuation")
253            .unwrap_or_else(|| ".,;:!?".to_string());
254
255        Box::new(MD036NoEmphasisAsHeading::new(punctuation))
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::lint_context::LintContext;
263
264    #[test]
265    fn test_single_asterisk_emphasis() {
266        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
267        let content = "*This is emphasized*\n\nRegular text";
268        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
269        let result = rule.check(&ctx).unwrap();
270
271        assert_eq!(result.len(), 1);
272        assert_eq!(result[0].line, 1);
273        assert!(
274            result[0]
275                .message
276                .contains("Emphasis used instead of a heading: 'This is emphasized'")
277        );
278    }
279
280    #[test]
281    fn test_single_underscore_emphasis() {
282        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
283        let content = "_This is emphasized_\n\nRegular text";
284        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
285        let result = rule.check(&ctx).unwrap();
286
287        assert_eq!(result.len(), 1);
288        assert_eq!(result[0].line, 1);
289        assert!(
290            result[0]
291                .message
292                .contains("Emphasis used instead of a heading: 'This is emphasized'")
293        );
294    }
295
296    #[test]
297    fn test_double_asterisk_strong() {
298        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
299        let content = "**This is strong**\n\nRegular text";
300        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
301        let result = rule.check(&ctx).unwrap();
302
303        assert_eq!(result.len(), 1);
304        assert_eq!(result[0].line, 1);
305        assert!(
306            result[0]
307                .message
308                .contains("Emphasis used instead of a heading: 'This is strong'")
309        );
310    }
311
312    #[test]
313    fn test_double_underscore_strong() {
314        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
315        let content = "__This is strong__\n\nRegular text";
316        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
317        let result = rule.check(&ctx).unwrap();
318
319        assert_eq!(result.len(), 1);
320        assert_eq!(result[0].line, 1);
321        assert!(
322            result[0]
323                .message
324                .contains("Emphasis used instead of a heading: 'This is strong'")
325        );
326    }
327
328    #[test]
329    fn test_emphasis_with_punctuation() {
330        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
331        let content = "**Important Note:**\n\nRegular text";
332        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
333        let result = rule.check(&ctx).unwrap();
334
335        // Emphasis with punctuation should NOT be flagged (matches markdownlint)
336        assert_eq!(result.len(), 0);
337    }
338
339    #[test]
340    fn test_emphasis_in_paragraph() {
341        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
342        let content = "This is a paragraph with *emphasis* in the middle.";
343        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
344        let result = rule.check(&ctx).unwrap();
345
346        // Should not flag emphasis within a line
347        assert_eq!(result.len(), 0);
348    }
349
350    #[test]
351    fn test_emphasis_in_list() {
352        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
353        let content = "- *List item with emphasis*\n- Another item";
354        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
355        let result = rule.check(&ctx).unwrap();
356
357        // Should not flag emphasis in list items
358        assert_eq!(result.len(), 0);
359    }
360
361    #[test]
362    fn test_emphasis_in_blockquote() {
363        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
364        let content = "> *Quote with emphasis*\n> Another line";
365        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
366        let result = rule.check(&ctx).unwrap();
367
368        // Should not flag emphasis in blockquotes
369        assert_eq!(result.len(), 0);
370    }
371
372    #[test]
373    fn test_emphasis_in_code_block() {
374        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
375        let content = "```\n*Not emphasis in code*\n```";
376        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
377        let result = rule.check(&ctx).unwrap();
378
379        // Should not flag emphasis in code blocks
380        assert_eq!(result.len(), 0);
381    }
382
383    #[test]
384    fn test_toc_label() {
385        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
386        let content = "**Table of Contents**\n\n- Item 1\n- Item 2";
387        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
388        let result = rule.check(&ctx).unwrap();
389
390        // Should not flag common TOC labels
391        assert_eq!(result.len(), 0);
392    }
393
394    #[test]
395    fn test_already_heading() {
396        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
397        let content = "# **Bold in heading**\n\nRegular text";
398        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
399        let result = rule.check(&ctx).unwrap();
400
401        // Should not flag emphasis that's already in a heading
402        assert_eq!(result.len(), 0);
403    }
404
405    #[test]
406    fn test_fix_no_changes() {
407        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
408        let content = "*Convert to heading*\n\nRegular text";
409        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
410        let fixed = rule.fix(&ctx).unwrap();
411
412        // MD036 no longer provides automatic fixes
413        assert_eq!(fixed, content);
414    }
415
416    #[test]
417    fn test_fix_preserves_content() {
418        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
419        let content = "**Convert to heading**\n\nRegular text";
420        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
421        let fixed = rule.fix(&ctx).unwrap();
422
423        // MD036 no longer provides automatic fixes
424        assert_eq!(fixed, content);
425    }
426
427    #[test]
428    fn test_empty_punctuation_config() {
429        let rule = MD036NoEmphasisAsHeading::new("".to_string());
430        let content = "**Important Note:**\n\nRegular text";
431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
432        let result = rule.check(&ctx).unwrap();
433
434        // With empty punctuation config, all emphasis is flagged
435        assert_eq!(result.len(), 1);
436
437        let fixed = rule.fix(&ctx).unwrap();
438        // MD036 no longer provides automatic fixes
439        assert_eq!(fixed, content);
440    }
441
442    #[test]
443    fn test_multiple_emphasized_lines() {
444        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
445        let content = "*First heading*\n\nSome text\n\n**Second heading**\n\nMore text";
446        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
447        let result = rule.check(&ctx).unwrap();
448
449        assert_eq!(result.len(), 2);
450        assert_eq!(result[0].line, 1);
451        assert_eq!(result[1].line, 5);
452    }
453
454    #[test]
455    fn test_whitespace_handling() {
456        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
457        let content = "  **Indented emphasis**  \n\nRegular text";
458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
459        let result = rule.check(&ctx).unwrap();
460
461        assert_eq!(result.len(), 1);
462        assert_eq!(result[0].line, 1);
463    }
464
465    #[test]
466    fn test_nested_emphasis() {
467        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
468        let content = "***Not a simple emphasis***\n\nRegular text";
469        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
470        let result = rule.check(&ctx).unwrap();
471
472        // Nested emphasis (3 asterisks) should not match our patterns
473        assert_eq!(result.len(), 0);
474    }
475
476    #[test]
477    fn test_emphasis_with_newlines() {
478        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
479        let content = "*First line\nSecond line*\n\nRegular text";
480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
481        let result = rule.check(&ctx).unwrap();
482
483        // Multi-line emphasis should not be flagged
484        assert_eq!(result.len(), 0);
485    }
486
487    #[test]
488    fn test_fix_preserves_trailing_newline() {
489        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
490        let content = "*Convert to heading*\n";
491        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
492        let fixed = rule.fix(&ctx).unwrap();
493
494        // MD036 no longer provides automatic fixes
495        assert_eq!(fixed, content);
496    }
497
498    #[test]
499    fn test_default_config() {
500        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
501        let (name, config) = rule.default_config_section().unwrap();
502        assert_eq!(name, "MD036");
503
504        let table = config.as_table().unwrap();
505        assert_eq!(table.get("punctuation").unwrap().as_str().unwrap(), ".,;:!?");
506    }
507
508    #[test]
509    fn test_image_caption_scenario() {
510        // Test the specific issue from #23 - bold text used as image caption
511        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
512        let content = "#### Métriques\n\n**commits par année : rumdl**\n\n![rumdl Commits By Year image](commits_by_year.png \"commits par année : rumdl\")";
513        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
514        let result = rule.check(&ctx).unwrap();
515
516        // Should detect the bold text even though it's followed by an image
517        assert_eq!(result.len(), 1);
518        assert_eq!(result[0].line, 3);
519        assert!(result[0].message.contains("commits par année : rumdl"));
520
521        // But should NOT provide a fix
522        assert!(result[0].fix.is_none());
523
524        // And the fix method should return unchanged content
525        let fixed = rule.fix(&ctx).unwrap();
526        assert_eq!(fixed, content);
527    }
528
529    #[test]
530    fn test_bold_with_colon_no_punctuation_config() {
531        // Test that with empty punctuation config, even text ending with colon is flagged
532        let rule = MD036NoEmphasisAsHeading::new("".to_string());
533        let content = "**commits par année : rumdl**\n\nSome text";
534        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
535        let result = rule.check(&ctx).unwrap();
536
537        // With empty punctuation config, this should be flagged
538        assert_eq!(result.len(), 1);
539        assert!(result[0].fix.is_none());
540    }
541
542    #[test]
543    fn test_bold_with_colon_default_config() {
544        // Test that with default punctuation config, text ending with colon is NOT flagged
545        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
546        let content = "**Important Note:**\n\nSome text";
547        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
548        let result = rule.check(&ctx).unwrap();
549
550        // With default punctuation including colon, this should NOT be flagged
551        assert_eq!(result.len(), 0);
552    }
553}