rumdl_lib/rules/
md036_no_emphasis_only_first.rs

1//!
2//! Rule MD036: No emphasis used as a heading
3//!
4//! See [docs/md036.md](../../docs/md036.md) for full documentation, configuration, and examples.
5
6use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
7use crate::utils::range_utils::calculate_emphasis_range;
8use lazy_static::lazy_static;
9use regex::Regex;
10use toml;
11
12mod md036_config;
13use md036_config::MD036Config;
14
15lazy_static! {
16    // Optimize regex patterns with compilation once at startup
17    // Note: The content between emphasis markers should not contain other emphasis markers
18    // to avoid matching nested emphasis like _**text**_ or **_text_**
19    static ref RE_ASTERISK_SINGLE: Regex = Regex::new(r"^\s*\*([^*_\n]+)\*\s*$").unwrap();
20    static ref RE_UNDERSCORE_SINGLE: Regex = Regex::new(r"^\s*_([^*_\n]+)_\s*$").unwrap();
21    static ref RE_ASTERISK_DOUBLE: Regex = Regex::new(r"^\s*\*\*([^*_\n]+)\*\*\s*$").unwrap();
22    static ref RE_UNDERSCORE_DOUBLE: Regex = Regex::new(r"^\s*__([^*_\n]+)__\s*$").unwrap();
23    static ref LIST_MARKER: Regex = Regex::new(r"^\s*(?:[*+-]|\d+\.)\s+").unwrap();
24    static ref BLOCKQUOTE_MARKER: Regex = Regex::new(r"^\s*>").unwrap();
25    static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)(`{3,}|~{3,})").unwrap();
26    static ref HEADING_MARKER: Regex = Regex::new(r"^#+\s").unwrap();
27    static ref HEADING_WITH_EMPHASIS: Regex = Regex::new(r"^(#+\s+).*(?:\*\*|\*|__|_)").unwrap();
28    // Pattern to match common Table of Contents labels that should not be converted to headings
29    static ref TOC_LABEL_PATTERN: Regex = Regex::new(r"^\s*(?:\*\*|\*|__|_)(?:Table of Contents|Contents|TOC|Index)(?:\*\*|\*|__|_)\s*$").unwrap();
30}
31
32/// Rule MD036: Emphasis used instead of a heading
33#[derive(Clone, Default)]
34pub struct MD036NoEmphasisAsHeading {
35    config: MD036Config,
36}
37
38impl MD036NoEmphasisAsHeading {
39    pub fn new(punctuation: String) -> Self {
40        Self {
41            config: MD036Config { punctuation },
42        }
43    }
44
45    pub fn from_config_struct(config: MD036Config) -> Self {
46        Self { config }
47    }
48
49    fn ends_with_punctuation(&self, text: &str) -> bool {
50        if text.is_empty() {
51            return false;
52        }
53        let trimmed = text.trim();
54        if trimmed.is_empty() {
55            return false;
56        }
57        // Check if the last character is in the punctuation set
58        trimmed
59            .chars()
60            .last()
61            .is_some_and(|ch| self.config.punctuation.contains(ch))
62    }
63
64    fn contains_link_or_code(&self, text: &str) -> bool {
65        // Check for inline code: `code`
66        // This is simple but effective since we're checking text that's already
67        // been identified as emphasized content
68        if text.contains('`') {
69            return true;
70        }
71
72        // Check for markdown links: [text](url) or [text][ref]
73        // We need both [ and ] for it to be a potential link
74        // and either ( ) for inline links or ][ for reference links
75        if text.contains('[') && text.contains(']') {
76            // Check for inline link pattern [...](...)
77            if text.contains("](") {
78                return true;
79            }
80            // Check for reference link pattern [...][...] or [...][]
81            if text.contains("][") || text.ends_with(']') {
82                return true;
83            }
84        }
85
86        false
87    }
88
89    fn is_entire_line_emphasized(
90        &self,
91        line: &str,
92        ctx: &crate::lint_context::LintContext,
93        line_num: usize,
94    ) -> Option<(usize, String, usize, usize)> {
95        let original_line = line;
96        let line = line.trim();
97
98        // Fast path for empty lines and lines that don't contain emphasis markers
99        if line.is_empty() || (!line.contains('*') && !line.contains('_')) {
100            return None;
101        }
102
103        // Skip if line is already a heading (but not a heading with emphasis)
104        if HEADING_MARKER.is_match(line) && !HEADING_WITH_EMPHASIS.is_match(line) {
105            return None;
106        }
107
108        // Skip if line is a Table of Contents label (common legitimate use of bold text)
109        if TOC_LABEL_PATTERN.is_match(line) {
110            return None;
111        }
112
113        // Skip if line is in a list, blockquote, or code block
114        if LIST_MARKER.is_match(line) || BLOCKQUOTE_MARKER.is_match(line) || ctx.is_in_code_block(line_num + 1)
115        // line_num is 0-based, but LintContext expects 1-based
116        {
117            return None;
118        }
119
120        // Helper closure to check common conditions for all emphasis patterns
121        let check_emphasis = |text: &str, level: usize, pattern: String| -> Option<(usize, String, usize, usize)> {
122            // Check if text ends with punctuation - if so, don't flag it
123            if !self.config.punctuation.is_empty() && self.ends_with_punctuation(text) {
124                return None;
125            }
126            // Skip if text contains links or inline code (matches markdownlint behavior)
127            // In markdownlint, these would be multiple tokens and thus not flagged
128            if self.contains_link_or_code(text) {
129                return None;
130            }
131            // Find position in original line by looking for the emphasis pattern
132            let start_pos = original_line.find(&pattern).unwrap_or(0);
133            let end_pos = start_pos + pattern.len();
134            Some((level, text.to_string(), start_pos, end_pos))
135        };
136
137        // Check for *emphasis* pattern (entire line)
138        if let Some(caps) = RE_ASTERISK_SINGLE.captures(line) {
139            let text = caps.get(1).unwrap().as_str();
140            let pattern = format!("*{text}*");
141            return check_emphasis(text, 1, pattern);
142        }
143
144        // Check for _emphasis_ pattern (entire line)
145        if let Some(caps) = RE_UNDERSCORE_SINGLE.captures(line) {
146            let text = caps.get(1).unwrap().as_str();
147            let pattern = format!("_{text}_");
148            return check_emphasis(text, 1, pattern);
149        }
150
151        // Check for **strong** pattern (entire line)
152        if let Some(caps) = RE_ASTERISK_DOUBLE.captures(line) {
153            let text = caps.get(1).unwrap().as_str();
154            let pattern = format!("**{text}**");
155            return check_emphasis(text, 2, pattern);
156        }
157
158        // Check for __strong__ pattern (entire line)
159        if let Some(caps) = RE_UNDERSCORE_DOUBLE.captures(line) {
160            let text = caps.get(1).unwrap().as_str();
161            let pattern = format!("__{text}__");
162            return check_emphasis(text, 2, pattern);
163        }
164
165        None
166    }
167}
168
169impl Rule for MD036NoEmphasisAsHeading {
170    fn name(&self) -> &'static str {
171        "MD036"
172    }
173
174    fn description(&self) -> &'static str {
175        "Emphasis should not be used instead of a heading"
176    }
177
178    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
179        let content = ctx.content;
180        // Fast path for empty content or content without emphasis markers
181        if content.is_empty() || (!content.contains('*') && !content.contains('_')) {
182            return Ok(Vec::new());
183        }
184
185        let mut warnings = Vec::new();
186
187        for (i, line) in content.lines().enumerate() {
188            // Skip obvious non-matches quickly
189            if line.trim().is_empty() || (!line.contains('*') && !line.contains('_')) {
190                continue;
191            }
192
193            if let Some((_level, text, start_pos, end_pos)) = self.is_entire_line_emphasized(line, ctx, i) {
194                let (start_line, start_col, end_line, end_col) =
195                    calculate_emphasis_range(i + 1, line, start_pos, end_pos);
196
197                warnings.push(LintWarning {
198                    rule_name: Some(self.name()),
199                    line: start_line,
200                    column: start_col,
201                    end_line,
202                    end_column: end_col,
203                    message: format!("Emphasis used instead of a heading: '{text}'"),
204                    severity: Severity::Warning,
205                    fix: None, // No automatic fix - too risky to convert to heading
206                });
207            }
208        }
209
210        Ok(warnings)
211    }
212
213    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
214        // MD036 does not provide automatic fixes
215        // Converting bold text to headings is too risky and can corrupt documents
216        // Users should manually decide if bold text should be a heading
217        Ok(ctx.content.to_string())
218    }
219
220    /// Check if this rule should be skipped for performance
221    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
222        // Skip if content is empty or has no emphasis markers
223        ctx.content.is_empty() || !ctx.likely_has_emphasis()
224    }
225
226    fn as_any(&self) -> &dyn std::any::Any {
227        self
228    }
229
230    fn default_config_section(&self) -> Option<(String, toml::Value)> {
231        let mut map = toml::map::Map::new();
232        map.insert(
233            "punctuation".to_string(),
234            toml::Value::String(self.config.punctuation.clone()),
235        );
236        Some((self.name().to_string(), toml::Value::Table(map)))
237    }
238
239    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
240    where
241        Self: Sized,
242    {
243        let punctuation = crate::config::get_rule_config_value::<String>(config, "MD036", "punctuation")
244            .unwrap_or_else(|| ".,;:!?".to_string());
245
246        Box::new(MD036NoEmphasisAsHeading::new(punctuation))
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253    use crate::lint_context::LintContext;
254
255    #[test]
256    fn test_single_asterisk_emphasis() {
257        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
258        let content = "*This is emphasized*\n\nRegular text";
259        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
260        let result = rule.check(&ctx).unwrap();
261
262        assert_eq!(result.len(), 1);
263        assert_eq!(result[0].line, 1);
264        assert!(
265            result[0]
266                .message
267                .contains("Emphasis used instead of a heading: 'This is emphasized'")
268        );
269    }
270
271    #[test]
272    fn test_single_underscore_emphasis() {
273        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
274        let content = "_This is emphasized_\n\nRegular text";
275        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
276        let result = rule.check(&ctx).unwrap();
277
278        assert_eq!(result.len(), 1);
279        assert_eq!(result[0].line, 1);
280        assert!(
281            result[0]
282                .message
283                .contains("Emphasis used instead of a heading: 'This is emphasized'")
284        );
285    }
286
287    #[test]
288    fn test_double_asterisk_strong() {
289        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
290        let content = "**This is strong**\n\nRegular text";
291        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
292        let result = rule.check(&ctx).unwrap();
293
294        assert_eq!(result.len(), 1);
295        assert_eq!(result[0].line, 1);
296        assert!(
297            result[0]
298                .message
299                .contains("Emphasis used instead of a heading: 'This is strong'")
300        );
301    }
302
303    #[test]
304    fn test_double_underscore_strong() {
305        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
306        let content = "__This is strong__\n\nRegular text";
307        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
308        let result = rule.check(&ctx).unwrap();
309
310        assert_eq!(result.len(), 1);
311        assert_eq!(result[0].line, 1);
312        assert!(
313            result[0]
314                .message
315                .contains("Emphasis used instead of a heading: 'This is strong'")
316        );
317    }
318
319    #[test]
320    fn test_emphasis_with_punctuation() {
321        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
322        let content = "**Important Note:**\n\nRegular text";
323        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
324        let result = rule.check(&ctx).unwrap();
325
326        // Emphasis with punctuation should NOT be flagged (matches markdownlint)
327        assert_eq!(result.len(), 0);
328    }
329
330    #[test]
331    fn test_emphasis_in_paragraph() {
332        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
333        let content = "This is a paragraph with *emphasis* in the middle.";
334        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
335        let result = rule.check(&ctx).unwrap();
336
337        // Should not flag emphasis within a line
338        assert_eq!(result.len(), 0);
339    }
340
341    #[test]
342    fn test_emphasis_in_list() {
343        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
344        let content = "- *List item with emphasis*\n- Another item";
345        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
346        let result = rule.check(&ctx).unwrap();
347
348        // Should not flag emphasis in list items
349        assert_eq!(result.len(), 0);
350    }
351
352    #[test]
353    fn test_emphasis_in_blockquote() {
354        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
355        let content = "> *Quote with emphasis*\n> Another line";
356        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
357        let result = rule.check(&ctx).unwrap();
358
359        // Should not flag emphasis in blockquotes
360        assert_eq!(result.len(), 0);
361    }
362
363    #[test]
364    fn test_emphasis_in_code_block() {
365        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
366        let content = "```\n*Not emphasis in code*\n```";
367        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
368        let result = rule.check(&ctx).unwrap();
369
370        // Should not flag emphasis in code blocks
371        assert_eq!(result.len(), 0);
372    }
373
374    #[test]
375    fn test_toc_label() {
376        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
377        let content = "**Table of Contents**\n\n- Item 1\n- Item 2";
378        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
379        let result = rule.check(&ctx).unwrap();
380
381        // Should not flag common TOC labels
382        assert_eq!(result.len(), 0);
383    }
384
385    #[test]
386    fn test_already_heading() {
387        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
388        let content = "# **Bold in heading**\n\nRegular text";
389        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
390        let result = rule.check(&ctx).unwrap();
391
392        // Should not flag emphasis that's already in a heading
393        assert_eq!(result.len(), 0);
394    }
395
396    #[test]
397    fn test_fix_no_changes() {
398        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
399        let content = "*Convert to heading*\n\nRegular text";
400        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
401        let fixed = rule.fix(&ctx).unwrap();
402
403        // MD036 no longer provides automatic fixes
404        assert_eq!(fixed, content);
405    }
406
407    #[test]
408    fn test_fix_preserves_content() {
409        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
410        let content = "**Convert to heading**\n\nRegular text";
411        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
412        let fixed = rule.fix(&ctx).unwrap();
413
414        // MD036 no longer provides automatic fixes
415        assert_eq!(fixed, content);
416    }
417
418    #[test]
419    fn test_empty_punctuation_config() {
420        let rule = MD036NoEmphasisAsHeading::new("".to_string());
421        let content = "**Important Note:**\n\nRegular text";
422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
423        let result = rule.check(&ctx).unwrap();
424
425        // With empty punctuation config, all emphasis is flagged
426        assert_eq!(result.len(), 1);
427
428        let fixed = rule.fix(&ctx).unwrap();
429        // MD036 no longer provides automatic fixes
430        assert_eq!(fixed, content);
431    }
432
433    #[test]
434    fn test_multiple_emphasized_lines() {
435        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
436        let content = "*First heading*\n\nSome text\n\n**Second heading**\n\nMore text";
437        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
438        let result = rule.check(&ctx).unwrap();
439
440        assert_eq!(result.len(), 2);
441        assert_eq!(result[0].line, 1);
442        assert_eq!(result[1].line, 5);
443    }
444
445    #[test]
446    fn test_whitespace_handling() {
447        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
448        let content = "  **Indented emphasis**  \n\nRegular text";
449        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
450        let result = rule.check(&ctx).unwrap();
451
452        assert_eq!(result.len(), 1);
453        assert_eq!(result[0].line, 1);
454    }
455
456    #[test]
457    fn test_nested_emphasis() {
458        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
459        let content = "***Not a simple emphasis***\n\nRegular text";
460        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
461        let result = rule.check(&ctx).unwrap();
462
463        // Nested emphasis (3 asterisks) should not match our patterns
464        assert_eq!(result.len(), 0);
465    }
466
467    #[test]
468    fn test_emphasis_with_newlines() {
469        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
470        let content = "*First line\nSecond line*\n\nRegular text";
471        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
472        let result = rule.check(&ctx).unwrap();
473
474        // Multi-line emphasis should not be flagged
475        assert_eq!(result.len(), 0);
476    }
477
478    #[test]
479    fn test_fix_preserves_trailing_newline() {
480        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
481        let content = "*Convert to heading*\n";
482        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
483        let fixed = rule.fix(&ctx).unwrap();
484
485        // MD036 no longer provides automatic fixes
486        assert_eq!(fixed, content);
487    }
488
489    #[test]
490    fn test_default_config() {
491        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
492        let (name, config) = rule.default_config_section().unwrap();
493        assert_eq!(name, "MD036");
494
495        let table = config.as_table().unwrap();
496        assert_eq!(table.get("punctuation").unwrap().as_str().unwrap(), ".,;:!?");
497    }
498
499    #[test]
500    fn test_image_caption_scenario() {
501        // Test the specific issue from #23 - bold text used as image caption
502        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
503        let content = "#### Métriques\n\n**commits par année : rumdl**\n\n![rumdl Commits By Year image](commits_by_year.png \"commits par année : rumdl\")";
504        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
505        let result = rule.check(&ctx).unwrap();
506
507        // Should detect the bold text even though it's followed by an image
508        assert_eq!(result.len(), 1);
509        assert_eq!(result[0].line, 3);
510        assert!(result[0].message.contains("commits par année : rumdl"));
511
512        // But should NOT provide a fix
513        assert!(result[0].fix.is_none());
514
515        // And the fix method should return unchanged content
516        let fixed = rule.fix(&ctx).unwrap();
517        assert_eq!(fixed, content);
518    }
519
520    #[test]
521    fn test_bold_with_colon_no_punctuation_config() {
522        // Test that with empty punctuation config, even text ending with colon is flagged
523        let rule = MD036NoEmphasisAsHeading::new("".to_string());
524        let content = "**commits par année : rumdl**\n\nSome text";
525        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
526        let result = rule.check(&ctx).unwrap();
527
528        // With empty punctuation config, this should be flagged
529        assert_eq!(result.len(), 1);
530        assert!(result[0].fix.is_none());
531    }
532
533    #[test]
534    fn test_bold_with_colon_default_config() {
535        // Test that with default punctuation config, text ending with colon is NOT flagged
536        let rule = MD036NoEmphasisAsHeading::new(".,;:!?".to_string());
537        let content = "**Important Note:**\n\nSome text";
538        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
539        let result = rule.check(&ctx).unwrap();
540
541        // With default punctuation including colon, this should NOT be flagged
542        assert_eq!(result.len(), 0);
543    }
544}