rumdl_lib/rules/
md043_required_headings.rs

1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use crate::rule_config_serde::RuleConfig;
3use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
4use crate::utils::range_utils::calculate_heading_range;
5use lazy_static::lazy_static;
6use regex::Regex;
7use serde::{Deserialize, Serialize};
8
9lazy_static! {
10    // Pattern for ATX headings
11    static ref ATX_HEADING: Regex = Regex::new(r"^(#+)\s+(.+)$").unwrap();
12    // Pattern for setext heading underlines
13    static ref SETEXT_UNDERLINE: Regex = Regex::new(r"^([=-]+)$").unwrap();
14}
15
16/// Configuration for MD043 rule
17#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
18#[serde(rename_all = "kebab-case")]
19pub struct MD043Config {
20    /// Required heading patterns
21    #[serde(default = "default_headings")]
22    pub headings: Vec<String>,
23    /// Case-sensitive matching (default: false)
24    #[serde(default = "default_match_case")]
25    pub match_case: bool,
26}
27
28impl Default for MD043Config {
29    fn default() -> Self {
30        Self {
31            headings: default_headings(),
32            match_case: default_match_case(),
33        }
34    }
35}
36
37fn default_headings() -> Vec<String> {
38    Vec::new()
39}
40
41fn default_match_case() -> bool {
42    false
43}
44
45impl RuleConfig for MD043Config {
46    const RULE_NAME: &'static str = "MD043";
47}
48
49/// Rule MD043: Required headings present
50///
51/// See [docs/md043.md](../../docs/md043.md) for full documentation, configuration, and examples.
52#[derive(Clone, Default)]
53pub struct MD043RequiredHeadings {
54    config: MD043Config,
55}
56
57impl MD043RequiredHeadings {
58    pub fn new(headings: Vec<String>) -> Self {
59        Self {
60            config: MD043Config {
61                headings,
62                match_case: default_match_case(),
63            },
64        }
65    }
66
67    /// Create a new instance with the given configuration
68    pub fn from_config_struct(config: MD043Config) -> Self {
69        Self { config }
70    }
71
72    /// Compare two headings based on the match_case configuration
73    fn headings_match(&self, expected: &str, actual: &str) -> bool {
74        if self.config.match_case {
75            expected == actual
76        } else {
77            expected.to_lowercase() == actual.to_lowercase()
78        }
79    }
80
81    fn extract_headings(&self, ctx: &crate::lint_context::LintContext) -> Vec<String> {
82        let mut result = Vec::new();
83
84        for line_info in &ctx.lines {
85            if let Some(heading) = &line_info.heading {
86                // Reconstruct the full heading format with the hash symbols
87                let full_heading = format!("{} {}", heading.marker, heading.text.trim());
88                result.push(full_heading);
89            }
90        }
91
92        result
93    }
94
95    fn is_heading(&self, line_index: usize, ctx: &crate::lint_context::LintContext) -> bool {
96        if line_index < ctx.lines.len() {
97            ctx.lines[line_index].heading.is_some()
98        } else {
99            false
100        }
101    }
102}
103
104impl Rule for MD043RequiredHeadings {
105    fn name(&self) -> &'static str {
106        "MD043"
107    }
108
109    fn description(&self) -> &'static str {
110        "Required heading structure"
111    }
112
113    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
114        let mut warnings = Vec::new();
115        let actual_headings = self.extract_headings(ctx);
116
117        // If no required headings are specified, the rule is disabled
118        if self.config.headings.is_empty() {
119            return Ok(warnings);
120        }
121
122        // Check if headings match based on case sensitivity configuration
123        let headings_match = if actual_headings.len() != self.config.headings.len() {
124            false
125        } else {
126            actual_headings
127                .iter()
128                .zip(self.config.headings.iter())
129                .all(|(actual, expected)| self.headings_match(expected, actual))
130        };
131
132        if !headings_match {
133            // If no headings found but we have required headings, create a warning
134            if actual_headings.is_empty() && !self.config.headings.is_empty() {
135                warnings.push(LintWarning {
136                    rule_name: Some(self.name()),
137                    line: 1,
138                    column: 1,
139                    end_line: 1,
140                    end_column: 2,
141                    message: format!("Required headings not found: {:?}", self.config.headings),
142                    severity: Severity::Warning,
143                    fix: None, // No automatic fix to prevent destructive changes
144                });
145                return Ok(warnings);
146            }
147
148            // Create warnings for each heading that doesn't match
149            for (i, line_info) in ctx.lines.iter().enumerate() {
150                if self.is_heading(i, ctx) {
151                    // Calculate precise character range for the entire heading
152                    let (start_line, start_col, end_line, end_col) = calculate_heading_range(i + 1, &line_info.content);
153
154                    warnings.push(LintWarning {
155                        rule_name: Some(self.name()),
156                        line: start_line,
157                        column: start_col,
158                        end_line,
159                        end_column: end_col,
160                        message: "Heading structure does not match the required structure".to_string(),
161                        severity: Severity::Warning,
162                        fix: None, // Cannot automatically fix as we don't know the intended structure
163                    });
164                }
165            }
166
167            // If we have no warnings but headings don't match (could happen if we have no headings),
168            // add a warning at the beginning of the file
169            if warnings.is_empty() {
170                warnings.push(LintWarning {
171                    rule_name: Some(self.name()),
172                    line: 1,
173                    column: 1,
174                    end_line: 1,
175                    end_column: 2,
176                    message: format!(
177                        "Heading structure does not match required structure. Expected: {:?}, Found: {:?}",
178                        self.config.headings, actual_headings
179                    ),
180                    severity: Severity::Warning,
181                    fix: None, // No automatic fix to prevent destructive changes
182                });
183            }
184        }
185
186        Ok(warnings)
187    }
188
189    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
190        let content = ctx.content;
191        // If no required headings are specified, return content as is
192        if self.config.headings.is_empty() {
193            return Ok(content.to_string());
194        }
195
196        let actual_headings = self.extract_headings(ctx);
197
198        // Check if headings already match - if so, no fix needed
199        if actual_headings.len() == self.config.headings.len()
200            && actual_headings
201                .iter()
202                .zip(self.config.headings.iter())
203                .all(|(actual, expected)| self.headings_match(expected, actual))
204        {
205            return Ok(content.to_string());
206        }
207
208        // IMPORTANT: MD043 fixes are inherently risky as they require restructuring the document.
209        // Instead of making destructive changes, we should be conservative and only make
210        // minimal changes when we're confident about the user's intent.
211
212        // For now, we'll avoid making destructive fixes and preserve the original content.
213        // This prevents data loss while still allowing the rule to identify issues.
214
215        // TODO: In the future, this could be enhanced to:
216        // 1. Insert missing required headings at appropriate positions
217        // 2. Rename existing headings to match requirements (when structure is similar)
218        // 3. Provide more granular fixes based on the specific mismatch
219
220        // Return original content unchanged to prevent data loss
221        Ok(content.to_string())
222    }
223
224    /// Optimized check using document structure
225    fn check_with_structure(
226        &self,
227        _ctx: &crate::lint_context::LintContext,
228        _structure: &DocumentStructure,
229    ) -> LintResult {
230        // Just use the regular check method which now uses cached headings
231        self.check(_ctx)
232    }
233
234    /// Get the category of this rule for selective processing
235    fn category(&self) -> RuleCategory {
236        RuleCategory::Heading
237    }
238
239    /// Check if this rule should be skipped
240    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
241        // Skip if no heading requirements or content is empty
242        if self.config.headings.is_empty() || ctx.content.is_empty() {
243            return true;
244        }
245
246        // Check if any heading exists using cached information
247        let has_heading = ctx.lines.iter().any(|line| line.heading.is_some());
248
249        !has_heading
250    }
251
252    fn as_any(&self) -> &dyn std::any::Any {
253        self
254    }
255
256    fn default_config_section(&self) -> Option<(String, toml::Value)> {
257        let default_config = MD043Config::default();
258        let json_value = serde_json::to_value(&default_config).ok()?;
259        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
260        if let toml::Value::Table(table) = toml_value {
261            if !table.is_empty() {
262                Some((MD043Config::RULE_NAME.to_string(), toml::Value::Table(table)))
263            } else {
264                None
265            }
266        } else {
267            None
268        }
269    }
270
271    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
272    where
273        Self: Sized,
274    {
275        let rule_config = crate::rule_config_serde::load_rule_config::<MD043Config>(config);
276        Box::new(MD043RequiredHeadings::from_config_struct(rule_config))
277    }
278}
279
280impl DocumentStructureExtensions for MD043RequiredHeadings {
281    fn has_relevant_elements(
282        &self,
283        _ctx: &crate::lint_context::LintContext,
284        doc_structure: &DocumentStructure,
285    ) -> bool {
286        !doc_structure.heading_lines.is_empty() || !self.config.headings.is_empty()
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293    use crate::lint_context::LintContext;
294    use crate::utils::document_structure::document_structure_from_str;
295
296    #[test]
297    fn test_extract_headings_code_blocks() {
298        // Create rule with required headings (now with hash symbols)
299        let required = vec!["# Test Document".to_string(), "## Real heading 2".to_string()];
300        let rule = MD043RequiredHeadings::new(required);
301
302        // Test 1: Basic content with code block
303        let content = "# Test Document\n\nThis is regular content.\n\n```markdown\n# This is a heading in a code block\n## Another heading in code block\n```\n\n## Real heading 2\n\nSome content.";
304        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
305        let actual_headings = rule.extract_headings(&ctx);
306        assert_eq!(
307            actual_headings,
308            vec!["# Test Document".to_string(), "## Real heading 2".to_string()],
309            "Should extract correct headings and ignore code blocks"
310        );
311
312        // Test 2: Content with invalid headings
313        let content = "# Test Document\n\nThis is regular content.\n\n```markdown\n# This is a heading in a code block\n## This should be ignored\n```\n\n## Not Real heading 2\n\nSome content.";
314        let ctx = crate::lint_context::LintContext::new(content, crate::config::MarkdownFlavor::Standard);
315        let actual_headings = rule.extract_headings(&ctx);
316        assert_eq!(
317            actual_headings,
318            vec!["# Test Document".to_string(), "## Not Real heading 2".to_string()],
319            "Should extract actual headings including mismatched ones"
320        );
321    }
322
323    #[test]
324    fn test_with_document_structure() {
325        // Test with required headings (now with hash symbols)
326        let required = vec![
327            "# Introduction".to_string(),
328            "# Method".to_string(),
329            "# Results".to_string(),
330        ];
331        let rule = MD043RequiredHeadings::new(required);
332
333        // Test with matching headings
334        let content = "# Introduction\n\nContent\n\n# Method\n\nMore content\n\n# Results\n\nFinal content";
335        let structure = document_structure_from_str(content);
336        let warnings = rule
337            .check_with_structure(
338                &LintContext::new(content, crate::config::MarkdownFlavor::Standard),
339                &structure,
340            )
341            .unwrap();
342        assert!(warnings.is_empty(), "Expected no warnings for matching headings");
343
344        // Test with mismatched headings
345        let content = "# Introduction\n\nContent\n\n# Results\n\nSkipped method";
346        let structure = document_structure_from_str(content);
347        let warnings = rule
348            .check_with_structure(
349                &LintContext::new(content, crate::config::MarkdownFlavor::Standard),
350                &structure,
351            )
352            .unwrap();
353        assert!(!warnings.is_empty(), "Expected warnings for mismatched headings");
354
355        // Test with no headings but requirements exist
356        let content = "No headings here, just plain text";
357        let structure = document_structure_from_str(content);
358        let warnings = rule
359            .check_with_structure(
360                &LintContext::new(content, crate::config::MarkdownFlavor::Standard),
361                &structure,
362            )
363            .unwrap();
364        assert!(!warnings.is_empty(), "Expected warnings when headings are missing");
365
366        // Test with setext headings - use the correct format (marker text)
367        let required_setext = vec![
368            "=========== Introduction".to_string(),
369            "------ Method".to_string(),
370            "======= Results".to_string(),
371        ];
372        let rule_setext = MD043RequiredHeadings::new(required_setext);
373        let content = "Introduction\n===========\n\nContent\n\nMethod\n------\n\nMore content\n\nResults\n=======\n\nFinal content";
374        let structure = document_structure_from_str(content);
375        let warnings = rule_setext
376            .check_with_structure(
377                &LintContext::new(content, crate::config::MarkdownFlavor::Standard),
378                &structure,
379            )
380            .unwrap();
381        assert!(warnings.is_empty(), "Expected no warnings for matching setext headings");
382    }
383
384    #[test]
385    fn test_should_skip_no_false_positives() {
386        // Create rule with required headings
387        let required = vec!["Test".to_string()];
388        let rule = MD043RequiredHeadings::new(required);
389
390        // Test 1: Content with '#' character in normal text (not a heading)
391        let content = "This paragraph contains a # character but is not a heading";
392        assert!(
393            rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
394            "Should skip content with # in normal text"
395        );
396
397        // Test 2: Content with code block containing heading-like syntax
398        let content = "Regular paragraph\n\n```markdown\n# This is not a real heading\n```\n\nMore text";
399        assert!(
400            rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
401            "Should skip content with heading-like syntax in code blocks"
402        );
403
404        // Test 3: Content with list items using '-' character
405        let content = "Some text\n\n- List item 1\n- List item 2\n\nMore text";
406        assert!(
407            rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
408            "Should skip content with list items using dash"
409        );
410
411        // Test 4: Content with horizontal rule that uses '---'
412        let content = "Some text\n\n---\n\nMore text below the horizontal rule";
413        assert!(
414            rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
415            "Should skip content with horizontal rule"
416        );
417
418        // Test 5: Content with equals sign in normal text
419        let content = "This is a normal paragraph with equals sign x = y + z";
420        assert!(
421            rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
422            "Should skip content with equals sign in normal text"
423        );
424
425        // Test 6: Content with dash/minus in normal text
426        let content = "This is a normal paragraph with minus sign x - y = z";
427        assert!(
428            rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
429            "Should skip content with minus sign in normal text"
430        );
431    }
432
433    #[test]
434    fn test_should_skip_heading_detection() {
435        // Create rule with required headings
436        let required = vec!["Test".to_string()];
437        let rule = MD043RequiredHeadings::new(required);
438
439        // Test 1: Content with ATX heading
440        let content = "# This is a heading\n\nAnd some content";
441        assert!(
442            !rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
443            "Should not skip content with ATX heading"
444        );
445
446        // Test 2: Content with Setext heading (equals sign)
447        let content = "This is a heading\n================\n\nAnd some content";
448        assert!(
449            !rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
450            "Should not skip content with Setext heading (=)"
451        );
452
453        // Test 3: Content with Setext heading (dash)
454        let content = "This is a subheading\n------------------\n\nAnd some content";
455        assert!(
456            !rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
457            "Should not skip content with Setext heading (-)"
458        );
459
460        // Test 4: Content with ATX heading with closing hashes
461        let content = "## This is a heading ##\n\nAnd some content";
462        assert!(
463            !rule.should_skip(&LintContext::new(content, crate::config::MarkdownFlavor::Standard)),
464            "Should not skip content with ATX heading with closing hashes"
465        );
466    }
467
468    #[test]
469    fn test_config_match_case_sensitive() {
470        let config = MD043Config {
471            headings: vec!["# Introduction".to_string(), "# Method".to_string()],
472            match_case: true,
473        };
474        let rule = MD043RequiredHeadings::from_config_struct(config);
475
476        // Should fail with different case
477        let content = "# introduction\n\n# method";
478        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
479        let result = rule.check(&ctx).unwrap();
480
481        assert!(
482            !result.is_empty(),
483            "Should detect case mismatch when match_case is true"
484        );
485    }
486
487    #[test]
488    fn test_config_match_case_insensitive() {
489        let config = MD043Config {
490            headings: vec!["# Introduction".to_string(), "# Method".to_string()],
491            match_case: false,
492        };
493        let rule = MD043RequiredHeadings::from_config_struct(config);
494
495        // Should pass with different case
496        let content = "# introduction\n\n# method";
497        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
498        let result = rule.check(&ctx).unwrap();
499
500        assert!(result.is_empty(), "Should allow case mismatch when match_case is false");
501    }
502
503    #[test]
504    fn test_config_case_insensitive_mixed() {
505        let config = MD043Config {
506            headings: vec!["# Introduction".to_string(), "# METHOD".to_string()],
507            match_case: false,
508        };
509        let rule = MD043RequiredHeadings::from_config_struct(config);
510
511        // Should pass with mixed case variations
512        let content = "# INTRODUCTION\n\n# method";
513        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
514        let result = rule.check(&ctx).unwrap();
515
516        assert!(
517            result.is_empty(),
518            "Should allow mixed case variations when match_case is false"
519        );
520    }
521
522    #[test]
523    fn test_config_case_sensitive_exact_match() {
524        let config = MD043Config {
525            headings: vec!["# Introduction".to_string(), "# Method".to_string()],
526            match_case: true,
527        };
528        let rule = MD043RequiredHeadings::from_config_struct(config);
529
530        // Should pass with exact case match
531        let content = "# Introduction\n\n# Method";
532        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
533        let result = rule.check(&ctx).unwrap();
534
535        assert!(
536            result.is_empty(),
537            "Should pass with exact case match when match_case is true"
538        );
539    }
540
541    #[test]
542    fn test_default_config() {
543        let rule = MD043RequiredHeadings::default();
544
545        // Should be disabled with empty headings
546        let content = "# Any heading\n\n# Another heading";
547        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
548        let result = rule.check(&ctx).unwrap();
549
550        assert!(result.is_empty(), "Should be disabled with default empty headings");
551    }
552
553    #[test]
554    fn test_default_config_section() {
555        let rule = MD043RequiredHeadings::default();
556        let config_section = rule.default_config_section();
557
558        assert!(config_section.is_some());
559        let (name, value) = config_section.unwrap();
560        assert_eq!(name, "MD043");
561
562        // Should contain both headings and match_case options with default values
563        if let toml::Value::Table(table) = value {
564            assert!(table.contains_key("headings"));
565            assert!(table.contains_key("match-case"));
566            assert_eq!(table["headings"], toml::Value::Array(vec![]));
567            assert_eq!(table["match-case"], toml::Value::Boolean(false));
568        } else {
569            panic!("Expected TOML table");
570        }
571    }
572
573    #[test]
574    fn test_headings_match_case_sensitive() {
575        let config = MD043Config {
576            headings: vec![],
577            match_case: true,
578        };
579        let rule = MD043RequiredHeadings::from_config_struct(config);
580
581        assert!(rule.headings_match("Test", "Test"));
582        assert!(!rule.headings_match("Test", "test"));
583        assert!(!rule.headings_match("test", "Test"));
584    }
585
586    #[test]
587    fn test_headings_match_case_insensitive() {
588        let config = MD043Config {
589            headings: vec![],
590            match_case: false,
591        };
592        let rule = MD043RequiredHeadings::from_config_struct(config);
593
594        assert!(rule.headings_match("Test", "Test"));
595        assert!(rule.headings_match("Test", "test"));
596        assert!(rule.headings_match("test", "Test"));
597        assert!(rule.headings_match("TEST", "test"));
598    }
599
600    #[test]
601    fn test_config_empty_headings() {
602        let config = MD043Config {
603            headings: vec![],
604            match_case: true,
605        };
606        let rule = MD043RequiredHeadings::from_config_struct(config);
607
608        // Should skip processing when no headings are required
609        let content = "# Any heading\n\n# Another heading";
610        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
611        let result = rule.check(&ctx).unwrap();
612
613        assert!(result.is_empty(), "Should be disabled with empty headings list");
614    }
615
616    #[test]
617    fn test_fix_respects_configuration() {
618        let config = MD043Config {
619            headings: vec!["# Title".to_string(), "# Content".to_string()],
620            match_case: false,
621        };
622        let rule = MD043RequiredHeadings::from_config_struct(config);
623
624        let content = "Wrong content";
625        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
626        let fixed = rule.fix(&ctx).unwrap();
627
628        // MD043 now preserves original content to prevent data loss
629        let expected = "Wrong content";
630        assert_eq!(fixed, expected);
631    }
632}