rumdl_lib/rules/
md036_no_emphasis_only_first.rs

1//!
2//! Rule MD036: No emphasis used as a heading
3//!
4//! See [docs/md036.md](../../docs/md036.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::range_utils::calculate_emphasis_range;
8use regex::Regex;
9use std::sync::LazyLock;
10use toml;
11
12mod md036_config;
13use md036_config::MD036Config;
14
15// Optimize regex patterns with compilation once at startup
16// Note: The content between emphasis markers should not contain other emphasis markers
17// to avoid matching nested emphasis like _**text**_ or **_text_**
18static RE_ASTERISK_SINGLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*([^*_\n]+)\*\s*$").unwrap());
19static RE_UNDERSCORE_SINGLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*_([^*_\n]+)_\s*$").unwrap());
20static RE_ASTERISK_DOUBLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*\*([^*_\n]+)\*\*\s*$").unwrap());
21static RE_UNDERSCORE_DOUBLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*__([^*_\n]+)__\s*$").unwrap());
22static LIST_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*(?:[*+-]|\d+\.)\s+").unwrap());
23static BLOCKQUOTE_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*>").unwrap());
24static HEADING_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^#+\s").unwrap());
25static HEADING_WITH_EMPHASIS: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(#+\s+).*(?:\*\*|\*|__|_)").unwrap());
26// Pattern to match common Table of Contents labels that should not be converted to headings
27static TOC_LABEL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
28    Regex::new(r"^\s*(?:\*\*|\*|__|_)(?:Table of Contents|Contents|TOC|Index)(?:\*\*|\*|__|_)\s*$").unwrap()
29});
30
31/// Rule MD036: Emphasis used instead of a heading
32#[derive(Clone, Default)]
33pub struct MD036NoEmphasisAsHeading {
34    config: MD036Config,
35}
36
37impl MD036NoEmphasisAsHeading {
38    pub fn new(punctuation: String) -> Self {
39        Self {
40            config: MD036Config { punctuation },
41        }
42    }
43
44    pub fn from_config_struct(config: MD036Config) -> Self {
45        Self { config }
46    }
47
48    fn ends_with_punctuation(&self, text: &str) -> bool {
49        if text.is_empty() {
50            return false;
51        }
52        let trimmed = text.trim();
53        if trimmed.is_empty() {
54            return false;
55        }
56        // Check if the last character is in the punctuation set
57        trimmed
58            .chars()
59            .last()
60            .is_some_and(|ch| self.config.punctuation.contains(ch))
61    }
62
63    fn contains_link_or_code(&self, text: &str) -> bool {
64        // Check for inline code: `code`
65        // This is simple but effective since we're checking text that's already
66        // been identified as emphasized content
67        if text.contains('`') {
68            return true;
69        }
70
71        // Check for markdown links: [text](url) or [text][ref]
72        // We need both [ and ] for it to be a potential link
73        // and either ( ) for inline links or ][ for reference links
74        if text.contains('[') && text.contains(']') {
75            // Check for inline link pattern [...](...)
76            if text.contains("](") {
77                return true;
78            }
79            // Check for reference link pattern [...][...] or [...][]
80            if text.contains("][") || text.ends_with(']') {
81                return true;
82            }
83        }
84
85        false
86    }
87
88    fn is_entire_line_emphasized(
89        &self,
90        line: &str,
91        ctx: &crate::lint_context::LintContext,
92        line_num: usize,
93    ) -> Option<(usize, String, usize, usize)> {
94        let original_line = line;
95        let line = line.trim();
96
97        // Fast path for empty lines and lines that don't contain emphasis markers
98        if line.is_empty() || (!line.contains('*') && !line.contains('_')) {
99            return None;
100        }
101
102        // Skip if line is already a heading (but not a heading with emphasis)
103        if HEADING_MARKER.is_match(line) && !HEADING_WITH_EMPHASIS.is_match(line) {
104            return None;
105        }
106
107        // Skip if line is a Table of Contents label (common legitimate use of bold text)
108        if TOC_LABEL_PATTERN.is_match(line) {
109            return None;
110        }
111
112        // Skip if line is in a list, blockquote, or code block
113        if LIST_MARKER.is_match(line)
114            || BLOCKQUOTE_MARKER.is_match(line)
115            || ctx.line_info(line_num + 1).is_some_and(|info| info.in_code_block)
116        // line_num is 0-based, but LintContext expects 1-based
117        {
118            return None;
119        }
120
121        // Helper closure to check common conditions for all emphasis patterns
122        let check_emphasis = |text: &str, level: usize, pattern: String| -> Option<(usize, String, usize, usize)> {
123            // Check if text ends with punctuation - if so, don't flag it
124            if !self.config.punctuation.is_empty() && self.ends_with_punctuation(text) {
125                return None;
126            }
127            // Skip if text contains links or inline code (matches markdownlint behavior)
128            // In markdownlint, these would be multiple tokens and thus not flagged
129            if self.contains_link_or_code(text) {
130                return None;
131            }
132            // Find position in original line by looking for the emphasis pattern
133            let start_pos = original_line.find(&pattern).unwrap_or(0);
134            let end_pos = start_pos + pattern.len();
135            Some((level, text.to_string(), start_pos, end_pos))
136        };
137
138        // Check for *emphasis* pattern (entire line)
139        if let Some(caps) = RE_ASTERISK_SINGLE.captures(line) {
140            let text = caps.get(1).unwrap().as_str();
141            let pattern = format!("*{text}*");
142            return check_emphasis(text, 1, pattern);
143        }
144
145        // Check for _emphasis_ pattern (entire line)
146        if let Some(caps) = RE_UNDERSCORE_SINGLE.captures(line) {
147            let text = caps.get(1).unwrap().as_str();
148            let pattern = format!("_{text}_");
149            return check_emphasis(text, 1, pattern);
150        }
151
152        // Check for **strong** pattern (entire line)
153        if let Some(caps) = RE_ASTERISK_DOUBLE.captures(line) {
154            let text = caps.get(1).unwrap().as_str();
155            let pattern = format!("**{text}**");
156            return check_emphasis(text, 2, pattern);
157        }
158
159        // Check for __strong__ pattern (entire line)
160        if let Some(caps) = RE_UNDERSCORE_DOUBLE.captures(line) {
161            let text = caps.get(1).unwrap().as_str();
162            let pattern = format!("__{text}__");
163            return check_emphasis(text, 2, pattern);
164        }
165
166        None
167    }
168}
169
170impl Rule for MD036NoEmphasisAsHeading {
171    fn name(&self) -> &'static str {
172        "MD036"
173    }
174
175    fn description(&self) -> &'static str {
176        "Emphasis should not be used instead of a heading"
177    }
178
179    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
180        let content = ctx.content;
181        // Fast path for empty content or content without emphasis markers
182        if content.is_empty() || (!content.contains('*') && !content.contains('_')) {
183            return Ok(Vec::new());
184        }
185
186        let mut warnings = Vec::new();
187
188        for (i, line) in content.lines().enumerate() {
189            // Skip obvious non-matches quickly
190            if line.trim().is_empty() || (!line.contains('*') && !line.contains('_')) {
191                continue;
192            }
193
194            if let Some((_level, text, start_pos, end_pos)) = self.is_entire_line_emphasized(line, ctx, i) {
195                let (start_line, start_col, end_line, end_col) =
196                    calculate_emphasis_range(i + 1, line, start_pos, end_pos);
197
198                warnings.push(LintWarning {
199                    rule_name: Some(self.name().to_string()),
200                    line: start_line,
201                    column: start_col,
202                    end_line,
203                    end_column: end_col,
204                    message: format!("Emphasis used instead of a heading: '{text}'"),
205                    severity: Severity::Warning,
206                    fix: None, // No automatic fix - too risky to convert to heading
207                });
208            }
209        }
210
211        Ok(warnings)
212    }
213
214    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
215        // MD036 does not provide automatic fixes
216        // Converting bold text to headings is too risky and can corrupt documents
217        // Users should manually decide if bold text should be a heading
218        Ok(ctx.content.to_string())
219    }
220
221    /// Check if this rule should be skipped for performance
222    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
223        // Skip if content is empty or has no emphasis markers
224        ctx.content.is_empty() || !ctx.likely_has_emphasis()
225    }
226
227    fn as_any(&self) -> &dyn std::any::Any {
228        self
229    }
230
231    fn default_config_section(&self) -> Option<(String, toml::Value)> {
232        let mut map = toml::map::Map::new();
233        map.insert(
234            "punctuation".to_string(),
235            toml::Value::String(self.config.punctuation.clone()),
236        );
237        Some((self.name().to_string(), toml::Value::Table(map)))
238    }
239
240    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
241    where
242        Self: Sized,
243    {
244        let punctuation = crate::config::get_rule_config_value::<String>(config, "MD036", "punctuation")
245            .unwrap_or_else(|| ".,;:!?".to_string());
246
247        Box::new(MD036NoEmphasisAsHeading::new(punctuation))
248    }
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254    use crate::lint_context::LintContext;
255
256    #[test]
257    fn test_single_asterisk_emphasis() {
258        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
259        let content = "*This is emphasized*\n\nRegular text";
260        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
261        let result = rule.check(&ctx).unwrap();
262
263        assert_eq!(result.len(), 1);
264        assert_eq!(result[0].line, 1);
265        assert!(
266            result[0]
267                .message
268                .contains("Emphasis used instead of a heading: 'This is emphasized'")
269        );
270    }
271
272    #[test]
273    fn test_single_underscore_emphasis() {
274        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
275        let content = "_This is emphasized_\n\nRegular text";
276        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
277        let result = rule.check(&ctx).unwrap();
278
279        assert_eq!(result.len(), 1);
280        assert_eq!(result[0].line, 1);
281        assert!(
282            result[0]
283                .message
284                .contains("Emphasis used instead of a heading: 'This is emphasized'")
285        );
286    }
287
288    #[test]
289    fn test_double_asterisk_strong() {
290        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
291        let content = "**This is strong**\n\nRegular text";
292        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
293        let result = rule.check(&ctx).unwrap();
294
295        assert_eq!(result.len(), 1);
296        assert_eq!(result[0].line, 1);
297        assert!(
298            result[0]
299                .message
300                .contains("Emphasis used instead of a heading: 'This is strong'")
301        );
302    }
303
304    #[test]
305    fn test_double_underscore_strong() {
306        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
307        let content = "__This is strong__\n\nRegular text";
308        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
309        let result = rule.check(&ctx).unwrap();
310
311        assert_eq!(result.len(), 1);
312        assert_eq!(result[0].line, 1);
313        assert!(
314            result[0]
315                .message
316                .contains("Emphasis used instead of a heading: 'This is strong'")
317        );
318    }
319
320    #[test]
321    fn test_emphasis_with_punctuation() {
322        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
323        let content = "**Important Note:**\n\nRegular text";
324        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
325        let result = rule.check(&ctx).unwrap();
326
327        // Emphasis with punctuation should NOT be flagged (matches markdownlint)
328        assert_eq!(result.len(), 0);
329    }
330
331    #[test]
332    fn test_emphasis_in_paragraph() {
333        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
334        let content = "This is a paragraph with *emphasis* in the middle.";
335        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
336        let result = rule.check(&ctx).unwrap();
337
338        // Should not flag emphasis within a line
339        assert_eq!(result.len(), 0);
340    }
341
342    #[test]
343    fn test_emphasis_in_list() {
344        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
345        let content = "- *List item with emphasis*\n- Another item";
346        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
347        let result = rule.check(&ctx).unwrap();
348
349        // Should not flag emphasis in list items
350        assert_eq!(result.len(), 0);
351    }
352
353    #[test]
354    fn test_emphasis_in_blockquote() {
355        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
356        let content = "> *Quote with emphasis*\n> Another line";
357        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
358        let result = rule.check(&ctx).unwrap();
359
360        // Should not flag emphasis in blockquotes
361        assert_eq!(result.len(), 0);
362    }
363
364    #[test]
365    fn test_emphasis_in_code_block() {
366        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
367        let content = "```\n*Not emphasis in code*\n```";
368        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
369        let result = rule.check(&ctx).unwrap();
370
371        // Should not flag emphasis in code blocks
372        assert_eq!(result.len(), 0);
373    }
374
375    #[test]
376    fn test_toc_label() {
377        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
378        let content = "**Table of Contents**\n\n- Item 1\n- Item 2";
379        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
380        let result = rule.check(&ctx).unwrap();
381
382        // Should not flag common TOC labels
383        assert_eq!(result.len(), 0);
384    }
385
386    #[test]
387    fn test_already_heading() {
388        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
389        let content = "# **Bold in heading**\n\nRegular text";
390        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
391        let result = rule.check(&ctx).unwrap();
392
393        // Should not flag emphasis that's already in a heading
394        assert_eq!(result.len(), 0);
395    }
396
397    #[test]
398    fn test_fix_no_changes() {
399        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
400        let content = "*Convert to heading*\n\nRegular text";
401        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
402        let fixed = rule.fix(&ctx).unwrap();
403
404        // MD036 no longer provides automatic fixes
405        assert_eq!(fixed, content);
406    }
407
408    #[test]
409    fn test_fix_preserves_content() {
410        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
411        let content = "**Convert to heading**\n\nRegular text";
412        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
413        let fixed = rule.fix(&ctx).unwrap();
414
415        // MD036 no longer provides automatic fixes
416        assert_eq!(fixed, content);
417    }
418
419    #[test]
420    fn test_empty_punctuation_config() {
421        let rule = MD036NoEmphasisAsHeading::new("".to_string());
422        let content = "**Important Note:**\n\nRegular text";
423        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
424        let result = rule.check(&ctx).unwrap();
425
426        // With empty punctuation config, all emphasis is flagged
427        assert_eq!(result.len(), 1);
428
429        let fixed = rule.fix(&ctx).unwrap();
430        // MD036 no longer provides automatic fixes
431        assert_eq!(fixed, content);
432    }
433
434    #[test]
435    fn test_multiple_emphasized_lines() {
436        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
437        let content = "*First heading*\n\nSome text\n\n**Second heading**\n\nMore text";
438        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
439        let result = rule.check(&ctx).unwrap();
440
441        assert_eq!(result.len(), 2);
442        assert_eq!(result[0].line, 1);
443        assert_eq!(result[1].line, 5);
444    }
445
446    #[test]
447    fn test_whitespace_handling() {
448        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
449        let content = "  **Indented emphasis**  \n\nRegular text";
450        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
451        let result = rule.check(&ctx).unwrap();
452
453        assert_eq!(result.len(), 1);
454        assert_eq!(result[0].line, 1);
455    }
456
457    #[test]
458    fn test_nested_emphasis() {
459        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
460        let content = "***Not a simple emphasis***\n\nRegular text";
461        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
462        let result = rule.check(&ctx).unwrap();
463
464        // Nested emphasis (3 asterisks) should not match our patterns
465        assert_eq!(result.len(), 0);
466    }
467
468    #[test]
469    fn test_emphasis_with_newlines() {
470        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
471        let content = "*First line\nSecond line*\n\nRegular text";
472        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
473        let result = rule.check(&ctx).unwrap();
474
475        // Multi-line emphasis should not be flagged
476        assert_eq!(result.len(), 0);
477    }
478
479    #[test]
480    fn test_fix_preserves_trailing_newline() {
481        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
482        let content = "*Convert to heading*\n";
483        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
484        let fixed = rule.fix(&ctx).unwrap();
485
486        // MD036 no longer provides automatic fixes
487        assert_eq!(fixed, content);
488    }
489
490    #[test]
491    fn test_default_config() {
492        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
493        let (name, config) = rule.default_config_section().unwrap();
494        assert_eq!(name, "MD036");
495
496        let table = config.as_table().unwrap();
497        assert_eq!(table.get("punctuation").unwrap().as_str().unwrap(), ".,;:!?");
498    }
499
500    #[test]
501    fn test_image_caption_scenario() {
502        // Test the specific issue from #23 - bold text used as image caption
503        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
504        let content = "#### Métriques\n\n**commits par année : rumdl**\n\n![rumdl Commits By Year image](commits_by_year.png \"commits par année : rumdl\")";
505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
506        let result = rule.check(&ctx).unwrap();
507
508        // Should detect the bold text even though it's followed by an image
509        assert_eq!(result.len(), 1);
510        assert_eq!(result[0].line, 3);
511        assert!(result[0].message.contains("commits par année : rumdl"));
512
513        // But should NOT provide a fix
514        assert!(result[0].fix.is_none());
515
516        // And the fix method should return unchanged content
517        let fixed = rule.fix(&ctx).unwrap();
518        assert_eq!(fixed, content);
519    }
520
521    #[test]
522    fn test_bold_with_colon_no_punctuation_config() {
523        // Test that with empty punctuation config, even text ending with colon is flagged
524        let rule = MD036NoEmphasisAsHeading::new("".to_string());
525        let content = "**commits par année : rumdl**\n\nSome text";
526        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
527        let result = rule.check(&ctx).unwrap();
528
529        // With empty punctuation config, this should be flagged
530        assert_eq!(result.len(), 1);
531        assert!(result[0].fix.is_none());
532    }
533
534    #[test]
535    fn test_bold_with_colon_default_config() {
536        // Test that with default punctuation config, text ending with colon is NOT flagged
537        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
538        let content = "**Important Note:**\n\nSome text";
539        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
540        let result = rule.check(&ctx).unwrap();
541
542        // With default punctuation including colon, this should NOT be flagged
543        assert_eq!(result.len(), 0);
544    }
545}