Skip to main content

rumdl_lib/rules/
md073_toc_validation.rs

1//! MD073: Table of Contents validation rule
2//!
3//! Validates that TOC sections match the actual document headings.
4
5use crate::lint_context::LintContext;
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::anchor_styles::AnchorStyle;
8use regex::Regex;
9use std::collections::HashMap;
10use std::sync::LazyLock;
11
12/// Regex for TOC start marker: `<!-- toc -->` with optional whitespace variations
13static TOC_START_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*toc\s*-->").unwrap());
14
15/// Regex for TOC stop marker: `<!-- tocstop -->` or `<!-- /toc -->`
16static TOC_STOP_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*(?:tocstop|/toc)\s*-->").unwrap());
17
18/// Regex for extracting TOC entries: `- [text](#anchor)` or `* [text](#anchor)`
19/// with optional leading whitespace for nested items
20/// Handles nested brackets like `[`check [PATHS...]`](#check-paths)`
21static TOC_ENTRY_PATTERN: LazyLock<Regex> =
22    LazyLock::new(|| Regex::new(r"^(\s*)[-*]\s+\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]\(#([^)]+)\)").unwrap());
23
24/// Represents a detected TOC region in the document
25#[derive(Debug, Clone)]
26struct TocRegion {
27    /// 1-indexed start line of the TOC content (after the marker)
28    start_line: usize,
29    /// 1-indexed end line of the TOC content (before the stop marker)
30    end_line: usize,
31    /// Byte offset where TOC content starts
32    content_start: usize,
33    /// Byte offset where TOC content ends
34    content_end: usize,
35}
36
37/// A parsed TOC entry from the existing TOC
38#[derive(Debug, Clone)]
39struct TocEntry {
40    /// Display text of the link
41    text: String,
42    /// Anchor/fragment (without #)
43    anchor: String,
44    /// Number of leading whitespace characters (for indentation checking)
45    indent_spaces: usize,
46}
47
48/// An expected TOC entry generated from document headings
49#[derive(Debug, Clone)]
50struct ExpectedTocEntry {
51    /// 1-indexed line number of the heading
52    heading_line: usize,
53    /// Heading level (1-6)
54    level: u8,
55    /// Heading text (for display)
56    text: String,
57    /// Generated anchor
58    anchor: String,
59}
60
61/// Types of mismatches between actual and expected TOC
62#[derive(Debug)]
63enum TocMismatch {
64    /// Entry exists in TOC but heading doesn't exist
65    StaleEntry { entry: TocEntry },
66    /// Heading exists but no TOC entry for it
67    MissingEntry { expected: ExpectedTocEntry },
68    /// TOC entry text doesn't match heading text
69    TextMismatch {
70        entry: TocEntry,
71        expected: ExpectedTocEntry,
72    },
73    /// TOC entries are in wrong order
74    OrderMismatch { entry: TocEntry, expected_position: usize },
75    /// TOC entry has wrong indentation level
76    IndentationMismatch {
77        entry: TocEntry,
78        actual_indent: usize,
79        expected_indent: usize,
80    },
81}
82
83/// Regex patterns for stripping markdown formatting from heading text
84static MARKDOWN_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\([^)]+\)").unwrap());
85static MARKDOWN_REF_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[[^\]]*\]").unwrap());
86static MARKDOWN_IMAGE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").unwrap());
87/// Strip code spans from text, handling multi-backtick spans per CommonMark spec.
88/// E.g., `` `code` ``, ``` ``code with ` backtick`` ```, etc.
89fn strip_code_spans(text: &str) -> String {
90    let chars: Vec<char> = text.chars().collect();
91    let len = chars.len();
92    let mut result = String::with_capacity(text.len());
93    let mut i = 0;
94
95    while i < len {
96        if chars[i] == '`' {
97            // Count opening backticks
98            let open_start = i;
99            while i < len && chars[i] == '`' {
100                i += 1;
101            }
102            let backtick_count = i - open_start;
103
104            // Find matching closing backticks (same count)
105            let content_start = i;
106            let mut found_close = false;
107            while i < len {
108                if chars[i] == '`' {
109                    let close_start = i;
110                    while i < len && chars[i] == '`' {
111                        i += 1;
112                    }
113                    if i - close_start == backtick_count {
114                        // Found matching close - extract content
115                        let content: String = chars[content_start..close_start].iter().collect();
116                        // CommonMark: strip one leading and one trailing space if both exist
117                        let stripped = if content.starts_with(' ') && content.ends_with(' ') && content.len() > 1 {
118                            &content[1..content.len() - 1]
119                        } else {
120                            &content
121                        };
122                        result.push_str(stripped);
123                        found_close = true;
124                        break;
125                    }
126                } else {
127                    i += 1;
128                }
129            }
130            if !found_close {
131                // No matching close found - emit backticks literally
132                for _ in 0..backtick_count {
133                    result.push('`');
134                }
135                let remaining: String = chars[content_start..].iter().collect();
136                result.push_str(&remaining);
137                break;
138            }
139        } else {
140            result.push(chars[i]);
141            i += 1;
142        }
143    }
144
145    result
146}
147static MARKDOWN_BOLD_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*([^*]+)\*\*").unwrap());
148static MARKDOWN_BOLD_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__([^_]+)__").unwrap());
149static MARKDOWN_ITALIC_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+)\*").unwrap());
150// Match underscore italic at word boundaries (space or start/end)
151// Handles: "_text_", " _text_ ", "start _text_", "_text_ end"
152static MARKDOWN_ITALIC_UNDERSCORE: LazyLock<Regex> =
153    LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])_([^_]+)_([^a-zA-Z0-9]|$)").unwrap());
154
155/// Strip markdown formatting from text, preserving plain text content.
156/// Used for TOC entry display text.
157///
158/// Examples:
159/// - `[terminal](url)` → `terminal`
160/// - `**bold**` → `bold`
161/// - `` `code` `` → `code`
162/// - `Tool: [terminal](url)` → `Tool: terminal`
163fn strip_markdown_formatting(text: &str) -> String {
164    let mut result = text.to_string();
165
166    // Strip images first (before links, since images use similar syntax)
167    result = MARKDOWN_IMAGE.replace_all(&result, "$1").to_string();
168
169    // Strip links: [text](url) → text
170    result = MARKDOWN_LINK.replace_all(&result, "$1").to_string();
171
172    // Strip reference links: [text][ref] → text
173    result = MARKDOWN_REF_LINK.replace_all(&result, "$1").to_string();
174
175    // Strip code spans (handles multi-backtick spans like ``code with ` backtick``)
176    result = strip_code_spans(&result);
177
178    // Strip bold (do double before single to handle nested)
179    result = MARKDOWN_BOLD_ASTERISK.replace_all(&result, "$1").to_string();
180    result = MARKDOWN_BOLD_UNDERSCORE.replace_all(&result, "$1").to_string();
181
182    // Strip italic
183    result = MARKDOWN_ITALIC_ASTERISK.replace_all(&result, "$1").to_string();
184    // Underscore italic: preserve boundary chars, extract content
185    result = MARKDOWN_ITALIC_UNDERSCORE.replace_all(&result, "$1$2$3").to_string();
186
187    result
188}
189
190/// MD073: Table of Contents Validation
191///
192/// This rule validates that TOC sections match the actual document headings.
193/// It detects TOC regions via markers (`<!-- toc -->...<!-- tocstop -->`).
194///
195/// To opt into TOC validation, add markers to your document:
196/// ```markdown
197/// <!-- toc -->
198/// - [Section](#section)
199/// <!-- tocstop -->
200/// ```
201///
202/// ## Configuration
203///
204/// ```toml
205/// [MD073]
206/// # Enable the rule (opt-in, disabled by default)
207/// enabled = true
208/// # Minimum heading level to include (default: 2)
209/// min-level = 2
210/// # Maximum heading level to include (default: 4)
211/// max-level = 4
212/// # Whether TOC order must match document order (default: true)
213/// enforce-order = true
214/// # Indent size per nesting level (default: from MD007 config, or 2)
215/// indent = 2
216/// ```
217#[derive(Clone)]
218pub struct MD073TocValidation {
219    /// Whether this rule is enabled (default: false - opt-in rule)
220    enabled: bool,
221    /// Minimum heading level to include
222    min_level: u8,
223    /// Maximum heading level to include
224    max_level: u8,
225    /// Whether to enforce order matching
226    enforce_order: bool,
227    /// Indent size per nesting level (reads from MD007 config by default)
228    pub indent: usize,
229}
230
231impl Default for MD073TocValidation {
232    fn default() -> Self {
233        Self {
234            enabled: false, // Disabled by default - opt-in rule
235            min_level: 2,
236            max_level: 4,
237            enforce_order: true,
238            indent: 2, // Default indent, can be overridden by MD007 config
239        }
240    }
241}
242
243impl std::fmt::Debug for MD073TocValidation {
244    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
245        f.debug_struct("MD073TocValidation")
246            .field("enabled", &self.enabled)
247            .field("min_level", &self.min_level)
248            .field("max_level", &self.max_level)
249            .field("enforce_order", &self.enforce_order)
250            .field("indent", &self.indent)
251            .finish()
252    }
253}
254
255impl MD073TocValidation {
256    /// Create a new rule with default settings
257    pub fn new() -> Self {
258        Self::default()
259    }
260
261    /// Detect TOC region using markers
262    fn detect_by_markers(&self, ctx: &LintContext) -> Option<TocRegion> {
263        let mut start_line = None;
264        let mut start_byte = None;
265
266        for (idx, line_info) in ctx.lines.iter().enumerate() {
267            let line_num = idx + 1;
268            let content = line_info.content(ctx.content);
269
270            // Skip if in code block or front matter
271            if line_info.in_code_block || line_info.in_front_matter {
272                continue;
273            }
274
275            // Look for start marker or stop marker
276            if let (Some(s_line), Some(s_byte)) = (start_line, start_byte) {
277                // We have a start, now look for stop marker
278                if TOC_STOP_MARKER.is_match(content) {
279                    let end_line = line_num - 1;
280                    let content_end = line_info.byte_offset;
281
282                    // Handle case where there's no content between markers
283                    if end_line < s_line {
284                        return Some(TocRegion {
285                            start_line: s_line,
286                            end_line: s_line,
287                            content_start: s_byte,
288                            content_end: s_byte,
289                        });
290                    }
291
292                    return Some(TocRegion {
293                        start_line: s_line,
294                        end_line,
295                        content_start: s_byte,
296                        content_end,
297                    });
298                }
299            } else if TOC_START_MARKER.is_match(content) {
300                // TOC content starts on the next line
301                if idx + 1 < ctx.lines.len() {
302                    start_line = Some(line_num + 1);
303                    start_byte = Some(ctx.lines[idx + 1].byte_offset);
304                }
305            }
306        }
307
308        None
309    }
310
311    /// Detect TOC region using markers
312    fn detect_toc_region(&self, ctx: &LintContext) -> Option<TocRegion> {
313        self.detect_by_markers(ctx)
314    }
315
316    /// Extract TOC entries from the detected region
317    fn extract_toc_entries(&self, ctx: &LintContext, region: &TocRegion) -> Vec<TocEntry> {
318        let mut entries = Vec::new();
319
320        for idx in (region.start_line - 1)..region.end_line.min(ctx.lines.len()) {
321            let line_info = &ctx.lines[idx];
322            let content = line_info.content(ctx.content);
323
324            if let Some(caps) = TOC_ENTRY_PATTERN.captures(content) {
325                let indent_spaces = caps.get(1).map_or(0, |m| m.as_str().len());
326                let text = caps.get(2).map_or("", |m| m.as_str()).to_string();
327                let anchor = caps.get(3).map_or("", |m| m.as_str()).to_string();
328
329                entries.push(TocEntry {
330                    text,
331                    anchor,
332                    indent_spaces,
333                });
334            }
335        }
336
337        entries
338    }
339
340    /// Build expected TOC entries from document headings
341    fn build_expected_toc(&self, ctx: &LintContext, toc_region: &TocRegion) -> Vec<ExpectedTocEntry> {
342        let mut entries = Vec::new();
343        let mut fragment_counts: HashMap<String, usize> = HashMap::new();
344
345        for (idx, line_info) in ctx.lines.iter().enumerate() {
346            let line_num = idx + 1;
347
348            // Skip headings before/within the TOC region
349            if line_num <= toc_region.end_line {
350                // Also skip the TOC heading itself for heading-based detection
351                continue;
352            }
353
354            // Skip code blocks, front matter, HTML blocks
355            if line_info.in_code_block || line_info.in_front_matter || line_info.in_html_block {
356                continue;
357            }
358
359            if let Some(heading) = &line_info.heading {
360                // Filter by min/max level
361                if heading.level < self.min_level || heading.level > self.max_level {
362                    continue;
363                }
364
365                // Use custom ID if available, otherwise generate GitHub-style anchor
366                let base_anchor = if let Some(custom_id) = &heading.custom_id {
367                    custom_id.clone()
368                } else {
369                    AnchorStyle::GitHub.generate_fragment(&heading.text)
370                };
371
372                // Handle duplicate anchors
373                let anchor = if let Some(count) = fragment_counts.get_mut(&base_anchor) {
374                    let suffix = *count;
375                    *count += 1;
376                    format!("{base_anchor}-{suffix}")
377                } else {
378                    fragment_counts.insert(base_anchor.clone(), 1);
379                    base_anchor
380                };
381
382                entries.push(ExpectedTocEntry {
383                    heading_line: line_num,
384                    level: heading.level,
385                    text: heading.text.clone(),
386                    anchor,
387                });
388            }
389        }
390
391        entries
392    }
393
394    /// Compare actual TOC entries against expected and find mismatches
395    fn validate_toc(&self, actual: &[TocEntry], expected: &[ExpectedTocEntry]) -> Vec<TocMismatch> {
396        let mut mismatches = Vec::new();
397
398        // Build a map of expected anchors
399        let expected_anchors: HashMap<&str, &ExpectedTocEntry> =
400            expected.iter().map(|e| (e.anchor.as_str(), e)).collect();
401
402        // Count actual anchors (handles duplicate anchors in TOC)
403        let mut actual_anchor_counts: HashMap<&str, usize> = HashMap::new();
404        for entry in actual {
405            *actual_anchor_counts.entry(entry.anchor.as_str()).or_insert(0) += 1;
406        }
407
408        // Count expected anchors
409        let mut expected_anchor_counts: HashMap<&str, usize> = HashMap::new();
410        for exp in expected {
411            *expected_anchor_counts.entry(exp.anchor.as_str()).or_insert(0) += 1;
412        }
413
414        // Check for stale entries (in TOC but not in expected, accounting for counts)
415        let mut stale_anchor_counts: HashMap<&str, usize> = HashMap::new();
416        for entry in actual {
417            let actual_count = actual_anchor_counts.get(entry.anchor.as_str()).copied().unwrap_or(0);
418            let expected_count = expected_anchor_counts.get(entry.anchor.as_str()).copied().unwrap_or(0);
419            if actual_count > expected_count {
420                let reported = stale_anchor_counts.entry(entry.anchor.as_str()).or_insert(0);
421                if *reported < actual_count - expected_count {
422                    *reported += 1;
423                    mismatches.push(TocMismatch::StaleEntry { entry: entry.clone() });
424                }
425            }
426        }
427
428        // Check for missing entries (in expected but not in TOC, accounting for counts)
429        let mut missing_anchor_counts: HashMap<&str, usize> = HashMap::new();
430        for exp in expected {
431            let actual_count = actual_anchor_counts.get(exp.anchor.as_str()).copied().unwrap_or(0);
432            let expected_count = expected_anchor_counts.get(exp.anchor.as_str()).copied().unwrap_or(0);
433            if expected_count > actual_count {
434                let reported = missing_anchor_counts.entry(exp.anchor.as_str()).or_insert(0);
435                if *reported < expected_count - actual_count {
436                    *reported += 1;
437                    mismatches.push(TocMismatch::MissingEntry { expected: exp.clone() });
438                }
439            }
440        }
441
442        // Check for text mismatches (compare stripped versions)
443        for entry in actual {
444            if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
445                // Compare stripped text (removes markdown formatting like links, emphasis)
446                let actual_stripped = strip_markdown_formatting(entry.text.trim());
447                let expected_stripped = strip_markdown_formatting(exp.text.trim());
448                if actual_stripped != expected_stripped {
449                    mismatches.push(TocMismatch::TextMismatch {
450                        entry: entry.clone(),
451                        expected: (*exp).clone(),
452                    });
453                }
454            }
455        }
456
457        // Check for indentation mismatches
458        // Expected indentation is indent spaces per level difference from base level
459        if !expected.is_empty() {
460            let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
461
462            for entry in actual {
463                if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
464                    let level_diff = exp.level.saturating_sub(base_level) as usize;
465                    let expected_indent = level_diff * self.indent;
466
467                    if entry.indent_spaces != expected_indent {
468                        // Don't report indentation mismatch if already reported as text mismatch
469                        let already_reported = mismatches.iter().any(|m| match m {
470                            TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
471                            TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
472                            _ => false,
473                        });
474                        if !already_reported {
475                            mismatches.push(TocMismatch::IndentationMismatch {
476                                entry: entry.clone(),
477                                actual_indent: entry.indent_spaces,
478                                expected_indent,
479                            });
480                        }
481                    }
482                }
483            }
484        }
485
486        // Check order if enforce_order is enabled
487        if self.enforce_order && !actual.is_empty() && !expected.is_empty() {
488            let expected_order: Vec<&str> = expected.iter().map(|e| e.anchor.as_str()).collect();
489
490            // Find entries that exist in both but are out of order
491            let mut expected_idx = 0;
492            for entry in actual {
493                // Skip entries that don't exist in expected
494                if !expected_anchors.contains_key(entry.anchor.as_str()) {
495                    continue;
496                }
497
498                // Find where this anchor should be
499                while expected_idx < expected_order.len() && expected_order[expected_idx] != entry.anchor {
500                    expected_idx += 1;
501                }
502
503                if expected_idx >= expected_order.len() {
504                    // This entry is after where it should be
505                    let correct_pos = expected_order.iter().position(|a| *a == entry.anchor).unwrap_or(0);
506                    // Only add order mismatch if not already reported as stale/text mismatch
507                    let already_reported = mismatches.iter().any(|m| match m {
508                        TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
509                        TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
510                        _ => false,
511                    });
512                    if !already_reported {
513                        mismatches.push(TocMismatch::OrderMismatch {
514                            entry: entry.clone(),
515                            expected_position: correct_pos + 1,
516                        });
517                    }
518                } else {
519                    expected_idx += 1;
520                }
521            }
522        }
523
524        mismatches
525    }
526
527    /// Generate a new TOC from expected entries (always uses nested indentation)
528    fn generate_toc(&self, expected: &[ExpectedTocEntry]) -> String {
529        if expected.is_empty() {
530            return String::new();
531        }
532
533        let mut result = String::new();
534        let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
535        let indent_str = " ".repeat(self.indent);
536
537        for entry in expected {
538            let level_diff = entry.level.saturating_sub(base_level) as usize;
539            let indent = indent_str.repeat(level_diff);
540
541            // Strip markdown formatting from heading text for clean TOC entries
542            let display_text = strip_markdown_formatting(&entry.text);
543            result.push_str(&format!("{indent}- [{display_text}](#{})\n", entry.anchor));
544        }
545
546        result
547    }
548}
549
550impl Rule for MD073TocValidation {
551    fn name(&self) -> &'static str {
552        "MD073"
553    }
554
555    fn description(&self) -> &'static str {
556        "Table of Contents should match document headings"
557    }
558
559    fn should_skip(&self, ctx: &LintContext) -> bool {
560        // Quick check: skip if no TOC markers. detect_toc_region() is
561        // case-insensitive, so use a case-insensitive containment check here
562        // to avoid skipping fix() on documents with uppercase markers like
563        // `<!-- TOC -->`.
564        let lower = ctx.content.to_ascii_lowercase();
565        !(lower.contains("<!-- toc") || lower.contains("<!--toc"))
566    }
567
568    fn check(&self, ctx: &LintContext) -> LintResult {
569        let mut warnings = Vec::new();
570
571        // Detect TOC region
572        let Some(region) = self.detect_toc_region(ctx) else {
573            // No TOC found - nothing to validate
574            return Ok(warnings);
575        };
576
577        // Extract actual TOC entries
578        let actual_entries = self.extract_toc_entries(ctx, &region);
579
580        // Build expected TOC from headings
581        let expected_entries = self.build_expected_toc(ctx, &region);
582
583        // If no expected entries and no actual entries, nothing to validate
584        if expected_entries.is_empty() && actual_entries.is_empty() {
585            return Ok(warnings);
586        }
587
588        // Validate
589        let mismatches = self.validate_toc(&actual_entries, &expected_entries);
590
591        if !mismatches.is_empty() {
592            // Generate a single warning at the TOC region with details
593            let mut details = Vec::new();
594
595            for mismatch in &mismatches {
596                match mismatch {
597                    TocMismatch::StaleEntry { entry } => {
598                        details.push(format!("Stale entry: '{}' (heading no longer exists)", entry.text));
599                    }
600                    TocMismatch::MissingEntry { expected } => {
601                        details.push(format!(
602                            "Missing entry: '{}' (line {})",
603                            expected.text, expected.heading_line
604                        ));
605                    }
606                    TocMismatch::TextMismatch { entry, expected } => {
607                        details.push(format!(
608                            "Text mismatch: TOC has '{}', heading is '{}'",
609                            entry.text, expected.text
610                        ));
611                    }
612                    TocMismatch::OrderMismatch {
613                        entry,
614                        expected_position,
615                    } => {
616                        details.push(format!(
617                            "Order mismatch: '{}' should be at position {}",
618                            entry.text, expected_position
619                        ));
620                    }
621                    TocMismatch::IndentationMismatch {
622                        entry,
623                        actual_indent,
624                        expected_indent,
625                        ..
626                    } => {
627                        details.push(format!(
628                            "Indentation mismatch: '{}' has {} spaces, expected {} spaces",
629                            entry.text, actual_indent, expected_indent
630                        ));
631                    }
632                }
633            }
634
635            let message = format!(
636                "Table of Contents does not match document headings: {}",
637                details.join("; ")
638            );
639
640            // Generate fix: replace entire TOC content
641            let new_toc = self.generate_toc(&expected_entries);
642            let fix_range = region.content_start..region.content_end;
643
644            warnings.push(LintWarning {
645                rule_name: Some(self.name().to_string()),
646                message,
647                line: region.start_line,
648                column: 1,
649                end_line: region.end_line,
650                end_column: 1,
651                severity: Severity::Warning,
652                fix: Some(Fix {
653                    range: fix_range,
654                    replacement: new_toc,
655                }),
656            });
657        }
658
659        Ok(warnings)
660    }
661
662    fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
663        if self.should_skip(ctx) {
664            return Ok(ctx.content.to_string());
665        }
666        let warnings = self.check(ctx)?;
667        if warnings.is_empty() {
668            return Ok(ctx.content.to_string());
669        }
670        let warnings =
671            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
672        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
673    }
674
675    fn category(&self) -> RuleCategory {
676        RuleCategory::Other
677    }
678
679    fn as_any(&self) -> &dyn std::any::Any {
680        self
681    }
682
683    fn default_config_section(&self) -> Option<(String, toml::Value)> {
684        let value: toml::Value = toml::from_str(
685            r#"
686# Whether this rule is enabled (opt-in, disabled by default)
687enabled = false
688# Minimum heading level to include
689min-level = 2
690# Maximum heading level to include
691max-level = 4
692# Whether TOC order must match document order
693enforce-order = true
694# Indentation per nesting level (defaults to MD007's indent value)
695indent = 2
696"#,
697        )
698        .ok()?;
699        Some(("MD073".to_string(), value))
700    }
701
702    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
703    where
704        Self: Sized,
705    {
706        let mut rule = MD073TocValidation::default();
707        let mut indent_from_md073 = false;
708
709        if let Some(rule_config) = config.rules.get("MD073") {
710            // Parse enabled (opt-in rule, defaults to false)
711            if let Some(enabled) = rule_config.values.get("enabled").and_then(|v| v.as_bool()) {
712                rule.enabled = enabled;
713            }
714
715            // Parse min-level
716            if let Some(min_level) = rule_config.values.get("min-level").and_then(|v| v.as_integer()) {
717                rule.min_level = (min_level.clamp(1, 6)) as u8;
718            }
719
720            // Parse max-level
721            if let Some(max_level) = rule_config.values.get("max-level").and_then(|v| v.as_integer()) {
722                rule.max_level = (max_level.clamp(1, 6)) as u8;
723            }
724
725            // Parse enforce-order
726            if let Some(enforce_order) = rule_config.values.get("enforce-order").and_then(|v| v.as_bool()) {
727                rule.enforce_order = enforce_order;
728            }
729
730            // Parse indent (MD073-specific override)
731            if let Some(indent) = rule_config.values.get("indent").and_then(|v| v.as_integer()) {
732                rule.indent = (indent.clamp(1, 8)) as usize;
733                indent_from_md073 = true;
734            }
735        }
736
737        // If indent not explicitly set in MD073, read from MD007 config
738        if !indent_from_md073
739            && let Some(md007_config) = config.rules.get("MD007")
740            && let Some(indent) = md007_config.values.get("indent").and_then(|v| v.as_integer())
741        {
742            rule.indent = (indent.clamp(1, 8)) as usize;
743        }
744
745        Box::new(rule)
746    }
747}
748
749#[cfg(test)]
750mod tests {
751    use super::*;
752    use crate::config::MarkdownFlavor;
753
754    fn create_ctx(content: &str) -> LintContext<'_> {
755        LintContext::new(content, MarkdownFlavor::Standard, None)
756    }
757
758    /// Create rule with enabled=true for tests that call check() directly
759    fn create_enabled_rule() -> MD073TocValidation {
760        MD073TocValidation {
761            enabled: true,
762            ..MD073TocValidation::default()
763        }
764    }
765
766    // ========== Detection Tests ==========
767
768    #[test]
769    fn test_detect_markers_basic() {
770        let rule = MD073TocValidation::new();
771        let content = r#"# Title
772
773<!-- toc -->
774
775- [Heading 1](#heading-1)
776
777<!-- tocstop -->
778
779## Heading 1
780
781Content here.
782"#;
783        let ctx = create_ctx(content);
784        let region = rule.detect_by_markers(&ctx);
785        assert!(region.is_some());
786        let region = region.unwrap();
787        // Verify region boundaries are detected correctly
788        assert_eq!(region.start_line, 4);
789        assert_eq!(region.end_line, 6);
790    }
791
792    #[test]
793    fn test_detect_markers_variations() {
794        let rule = MD073TocValidation::new();
795
796        // Test <!--toc--> (no spaces)
797        let content1 = "<!--toc-->\n- [A](#a)\n<!--tocstop-->\n";
798        let ctx1 = create_ctx(content1);
799        assert!(rule.detect_by_markers(&ctx1).is_some());
800
801        // Test <!-- TOC --> (uppercase)
802        let content2 = "<!-- TOC -->\n- [A](#a)\n<!-- TOCSTOP -->\n";
803        let ctx2 = create_ctx(content2);
804        assert!(rule.detect_by_markers(&ctx2).is_some());
805
806        // Test <!-- /toc --> (alternative stop marker)
807        let content3 = "<!-- toc -->\n- [A](#a)\n<!-- /toc -->\n";
808        let ctx3 = create_ctx(content3);
809        assert!(rule.detect_by_markers(&ctx3).is_some());
810    }
811
812    #[test]
813    fn test_no_toc_region() {
814        let rule = MD073TocValidation::new();
815        let content = r#"# Title
816
817## Heading 1
818
819Content here.
820
821## Heading 2
822
823More content.
824"#;
825        let ctx = create_ctx(content);
826        let region = rule.detect_toc_region(&ctx);
827        assert!(region.is_none());
828    }
829
830    // ========== Validation Tests ==========
831
832    #[test]
833    fn test_toc_matches_headings() {
834        let rule = create_enabled_rule();
835        let content = r#"# Title
836
837<!-- toc -->
838
839- [Heading 1](#heading-1)
840- [Heading 2](#heading-2)
841
842<!-- tocstop -->
843
844## Heading 1
845
846Content.
847
848## Heading 2
849
850More content.
851"#;
852        let ctx = create_ctx(content);
853        let result = rule.check(&ctx).unwrap();
854        assert!(result.is_empty(), "Expected no warnings for matching TOC");
855    }
856
857    #[test]
858    fn test_missing_entry() {
859        let rule = create_enabled_rule();
860        let content = r#"# Title
861
862<!-- toc -->
863
864- [Heading 1](#heading-1)
865
866<!-- tocstop -->
867
868## Heading 1
869
870Content.
871
872## Heading 2
873
874New heading not in TOC.
875"#;
876        let ctx = create_ctx(content);
877        let result = rule.check(&ctx).unwrap();
878        assert_eq!(result.len(), 1);
879        assert!(result[0].message.contains("Missing entry"));
880        assert!(result[0].message.contains("Heading 2"));
881    }
882
883    #[test]
884    fn test_stale_entry() {
885        let rule = create_enabled_rule();
886        let content = r#"# Title
887
888<!-- toc -->
889
890- [Heading 1](#heading-1)
891- [Deleted Heading](#deleted-heading)
892
893<!-- tocstop -->
894
895## Heading 1
896
897Content.
898"#;
899        let ctx = create_ctx(content);
900        let result = rule.check(&ctx).unwrap();
901        assert_eq!(result.len(), 1);
902        assert!(result[0].message.contains("Stale entry"));
903        assert!(result[0].message.contains("Deleted Heading"));
904    }
905
906    #[test]
907    fn test_text_mismatch() {
908        let rule = create_enabled_rule();
909        let content = r#"# Title
910
911<!-- toc -->
912
913- [Old Name](#heading-1)
914
915<!-- tocstop -->
916
917## Heading 1
918
919Content.
920"#;
921        let ctx = create_ctx(content);
922        let result = rule.check(&ctx).unwrap();
923        assert_eq!(result.len(), 1);
924        assert!(result[0].message.contains("Text mismatch"));
925    }
926
927    // ========== Level Filtering Tests ==========
928
929    #[test]
930    fn test_min_level_excludes_h1() {
931        let mut rule = MD073TocValidation::new();
932        rule.min_level = 2;
933
934        let content = r#"<!-- toc -->
935
936<!-- tocstop -->
937
938# Should Be Excluded
939
940## Should Be Included
941
942Content.
943"#;
944        let ctx = create_ctx(content);
945        let region = rule.detect_toc_region(&ctx).unwrap();
946        let expected = rule.build_expected_toc(&ctx, &region);
947
948        assert_eq!(expected.len(), 1);
949        assert_eq!(expected[0].text, "Should Be Included");
950    }
951
952    #[test]
953    fn test_max_level_excludes_h5_h6() {
954        let mut rule = MD073TocValidation::new();
955        rule.max_level = 4;
956
957        let content = r#"<!-- toc -->
958
959<!-- tocstop -->
960
961## Level 2
962
963### Level 3
964
965#### Level 4
966
967##### Level 5 Should Be Excluded
968
969###### Level 6 Should Be Excluded
970"#;
971        let ctx = create_ctx(content);
972        let region = rule.detect_toc_region(&ctx).unwrap();
973        let expected = rule.build_expected_toc(&ctx, &region);
974
975        assert_eq!(expected.len(), 3);
976        assert!(expected.iter().all(|e| e.level <= 4));
977    }
978
979    // ========== Fix Tests ==========
980
981    #[test]
982    fn test_fix_adds_missing_entry() {
983        let rule = MD073TocValidation::new();
984        let content = r#"# Title
985
986<!-- toc -->
987
988- [Heading 1](#heading-1)
989
990<!-- tocstop -->
991
992## Heading 1
993
994Content.
995
996## Heading 2
997
998New heading.
999"#;
1000        let ctx = create_ctx(content);
1001        let fixed = rule.fix(&ctx).unwrap();
1002        assert!(fixed.contains("- [Heading 2](#heading-2)"));
1003    }
1004
1005    #[test]
1006    fn test_fix_removes_stale_entry() {
1007        let rule = MD073TocValidation::new();
1008        let content = r#"# Title
1009
1010<!-- toc -->
1011
1012- [Heading 1](#heading-1)
1013- [Deleted](#deleted)
1014
1015<!-- tocstop -->
1016
1017## Heading 1
1018
1019Content.
1020"#;
1021        let ctx = create_ctx(content);
1022        let fixed = rule.fix(&ctx).unwrap();
1023        assert!(fixed.contains("- [Heading 1](#heading-1)"));
1024        assert!(!fixed.contains("Deleted"));
1025    }
1026
1027    #[test]
1028    fn test_fix_idempotent() {
1029        let rule = MD073TocValidation::new();
1030        let content = r#"# Title
1031
1032<!-- toc -->
1033
1034- [Heading 1](#heading-1)
1035- [Heading 2](#heading-2)
1036
1037<!-- tocstop -->
1038
1039## Heading 1
1040
1041Content.
1042
1043## Heading 2
1044
1045More.
1046"#;
1047        let ctx = create_ctx(content);
1048        let fixed1 = rule.fix(&ctx).unwrap();
1049        let ctx2 = create_ctx(&fixed1);
1050        let fixed2 = rule.fix(&ctx2).unwrap();
1051
1052        // Second fix should produce same output
1053        assert_eq!(fixed1, fixed2);
1054    }
1055
1056    #[test]
1057    fn test_fix_preserves_markers() {
1058        let rule = MD073TocValidation::new();
1059        let content = r#"# Title
1060
1061<!-- toc -->
1062
1063Old TOC content.
1064
1065<!-- tocstop -->
1066
1067## New Heading
1068
1069Content.
1070"#;
1071        let ctx = create_ctx(content);
1072        let fixed = rule.fix(&ctx).unwrap();
1073
1074        // Markers should still be present
1075        assert!(fixed.contains("<!-- toc -->"));
1076        assert!(fixed.contains("<!-- tocstop -->"));
1077        // New content should be generated
1078        assert!(fixed.contains("- [New Heading](#new-heading)"));
1079    }
1080
1081    #[test]
1082    fn test_fix_requires_markers() {
1083        let rule = create_enabled_rule();
1084
1085        // Document without markers - no TOC detected, no changes
1086        let content_no_markers = r#"# Title
1087
1088## Heading 1
1089
1090Content.
1091"#;
1092        let ctx = create_ctx(content_no_markers);
1093        let fixed = rule.fix(&ctx).unwrap();
1094        assert_eq!(fixed, content_no_markers);
1095
1096        // Document with markers - TOC detected and fixed
1097        let content_markers = r#"# Title
1098
1099<!-- toc -->
1100
1101- [Old Entry](#old-entry)
1102
1103<!-- tocstop -->
1104
1105## Heading 1
1106
1107Content.
1108"#;
1109        let ctx = create_ctx(content_markers);
1110        let fixed = rule.fix(&ctx).unwrap();
1111        assert!(fixed.contains("- [Heading 1](#heading-1)"));
1112        assert!(!fixed.contains("Old Entry"));
1113    }
1114
1115    // ========== Anchor Tests ==========
1116
1117    #[test]
1118    fn test_duplicate_heading_anchors() {
1119        let rule = MD073TocValidation::new();
1120        let content = r#"# Title
1121
1122<!-- toc -->
1123
1124<!-- tocstop -->
1125
1126## Duplicate
1127
1128Content.
1129
1130## Duplicate
1131
1132More content.
1133
1134## Duplicate
1135
1136Even more.
1137"#;
1138        let ctx = create_ctx(content);
1139        let region = rule.detect_toc_region(&ctx).unwrap();
1140        let expected = rule.build_expected_toc(&ctx, &region);
1141
1142        assert_eq!(expected.len(), 3);
1143        assert_eq!(expected[0].anchor, "duplicate");
1144        assert_eq!(expected[1].anchor, "duplicate-1");
1145        assert_eq!(expected[2].anchor, "duplicate-2");
1146    }
1147
1148    // ========== Edge Cases ==========
1149
1150    #[test]
1151    fn test_headings_in_code_blocks_ignored() {
1152        let rule = create_enabled_rule();
1153        let content = r#"# Title
1154
1155<!-- toc -->
1156
1157- [Real Heading](#real-heading)
1158
1159<!-- tocstop -->
1160
1161## Real Heading
1162
1163```markdown
1164## Fake Heading In Code
1165```
1166
1167Content.
1168"#;
1169        let ctx = create_ctx(content);
1170        let result = rule.check(&ctx).unwrap();
1171        assert!(result.is_empty(), "Should not report fake heading in code block");
1172    }
1173
1174    #[test]
1175    fn test_empty_toc_region() {
1176        let rule = create_enabled_rule();
1177        let content = r#"# Title
1178
1179<!-- toc -->
1180<!-- tocstop -->
1181
1182## Heading 1
1183
1184Content.
1185"#;
1186        let ctx = create_ctx(content);
1187        let result = rule.check(&ctx).unwrap();
1188        assert_eq!(result.len(), 1);
1189        assert!(result[0].message.contains("Missing entry"));
1190    }
1191
1192    #[test]
1193    fn test_nested_indentation() {
1194        let rule = create_enabled_rule();
1195
1196        let content = r#"<!-- toc -->
1197
1198<!-- tocstop -->
1199
1200## Level 2
1201
1202### Level 3
1203
1204#### Level 4
1205
1206## Another Level 2
1207"#;
1208        let ctx = create_ctx(content);
1209        let region = rule.detect_toc_region(&ctx).unwrap();
1210        let expected = rule.build_expected_toc(&ctx, &region);
1211        let toc = rule.generate_toc(&expected);
1212
1213        // Check indentation (always nested)
1214        assert!(toc.contains("- [Level 2](#level-2)"));
1215        assert!(toc.contains("  - [Level 3](#level-3)"));
1216        assert!(toc.contains("    - [Level 4](#level-4)"));
1217        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1218    }
1219
1220    // ========== Indentation Mismatch Tests ==========
1221
1222    #[test]
1223    fn test_indentation_mismatch_detected() {
1224        let rule = create_enabled_rule();
1225        // TOC entries are all at same indentation level, but headings have different levels
1226        let content = r#"<!-- toc -->
1227- [Hello](#hello)
1228- [Another](#another)
1229- [Heading](#heading)
1230<!-- tocstop -->
1231
1232## Hello
1233
1234### Another
1235
1236## Heading
1237"#;
1238        let ctx = create_ctx(content);
1239        let result = rule.check(&ctx).unwrap();
1240        // Should detect indentation mismatch - "Another" is level 3 but has no indent
1241        assert_eq!(result.len(), 1, "Should report indentation mismatch: {result:?}");
1242        assert!(
1243            result[0].message.contains("Indentation mismatch"),
1244            "Message should mention indentation: {}",
1245            result[0].message
1246        );
1247        assert!(
1248            result[0].message.contains("Another"),
1249            "Message should mention the entry: {}",
1250            result[0].message
1251        );
1252    }
1253
1254    #[test]
1255    fn test_indentation_mismatch_fixed() {
1256        let rule = create_enabled_rule();
1257        // TOC entries are all at same indentation level, but headings have different levels
1258        let content = r#"<!-- toc -->
1259- [Hello](#hello)
1260- [Another](#another)
1261- [Heading](#heading)
1262<!-- tocstop -->
1263
1264## Hello
1265
1266### Another
1267
1268## Heading
1269"#;
1270        let ctx = create_ctx(content);
1271        let fixed = rule.fix(&ctx).unwrap();
1272        // After fix, "Another" should be indented
1273        assert!(fixed.contains("- [Hello](#hello)"));
1274        assert!(fixed.contains("  - [Another](#another)")); // Indented with 2 spaces
1275        assert!(fixed.contains("- [Heading](#heading)"));
1276    }
1277
1278    #[test]
1279    fn test_no_indentation_mismatch_when_correct() {
1280        let rule = create_enabled_rule();
1281        // TOC has correct indentation
1282        let content = r#"<!-- toc -->
1283- [Hello](#hello)
1284  - [Another](#another)
1285- [Heading](#heading)
1286<!-- tocstop -->
1287
1288## Hello
1289
1290### Another
1291
1292## Heading
1293"#;
1294        let ctx = create_ctx(content);
1295        let result = rule.check(&ctx).unwrap();
1296        // Should not report any issues - indentation is correct
1297        assert!(result.is_empty(), "Should not report issues: {result:?}");
1298    }
1299
1300    // ========== Order Mismatch Tests ==========
1301
1302    #[test]
1303    fn test_order_mismatch_detected() {
1304        let rule = create_enabled_rule();
1305        let content = r#"# Title
1306
1307<!-- toc -->
1308
1309- [Section B](#section-b)
1310- [Section A](#section-a)
1311
1312<!-- tocstop -->
1313
1314## Section A
1315
1316Content A.
1317
1318## Section B
1319
1320Content B.
1321"#;
1322        let ctx = create_ctx(content);
1323        let result = rule.check(&ctx).unwrap();
1324        // Should detect order mismatch - Section B appears before Section A in TOC
1325        // but Section A comes first in document
1326        assert!(!result.is_empty(), "Should detect order mismatch");
1327    }
1328
1329    #[test]
1330    fn test_order_mismatch_ignored_when_disabled() {
1331        let mut rule = create_enabled_rule();
1332        rule.enforce_order = false;
1333        let content = r#"# Title
1334
1335<!-- toc -->
1336
1337- [Section B](#section-b)
1338- [Section A](#section-a)
1339
1340<!-- tocstop -->
1341
1342## Section A
1343
1344Content A.
1345
1346## Section B
1347
1348Content B.
1349"#;
1350        let ctx = create_ctx(content);
1351        let result = rule.check(&ctx).unwrap();
1352        // With enforce_order=false, order mismatches should be ignored
1353        assert!(result.is_empty(), "Should not report order mismatch when disabled");
1354    }
1355
1356    // ========== Unicode and Special Characters Tests ==========
1357
1358    #[test]
1359    fn test_unicode_headings() {
1360        let rule = create_enabled_rule();
1361        let content = r#"# Title
1362
1363<!-- toc -->
1364
1365- [日本語の見出し](#日本語の見出し)
1366- [Émojis 🎉](#émojis-)
1367
1368<!-- tocstop -->
1369
1370## 日本語の見出し
1371
1372Japanese content.
1373
1374## Émojis 🎉
1375
1376Content with emojis.
1377"#;
1378        let ctx = create_ctx(content);
1379        let result = rule.check(&ctx).unwrap();
1380        // Should handle unicode correctly
1381        assert!(result.is_empty(), "Should handle unicode headings");
1382    }
1383
1384    #[test]
1385    fn test_special_characters_in_headings() {
1386        let rule = create_enabled_rule();
1387        let content = r#"# Title
1388
1389<!-- toc -->
1390
1391- [What's New?](#whats-new)
1392- [C++ Guide](#c-guide)
1393
1394<!-- tocstop -->
1395
1396## What's New?
1397
1398News content.
1399
1400## C++ Guide
1401
1402C++ content.
1403"#;
1404        let ctx = create_ctx(content);
1405        let result = rule.check(&ctx).unwrap();
1406        assert!(result.is_empty(), "Should handle special characters");
1407    }
1408
1409    #[test]
1410    fn test_code_spans_in_headings() {
1411        let rule = create_enabled_rule();
1412        let content = r#"# Title
1413
1414<!-- toc -->
1415
1416- [`check [PATHS...]`](#check-paths)
1417
1418<!-- tocstop -->
1419
1420## `check [PATHS...]`
1421
1422Command documentation.
1423"#;
1424        let ctx = create_ctx(content);
1425        let result = rule.check(&ctx).unwrap();
1426        assert!(result.is_empty(), "Should handle code spans in headings with brackets");
1427    }
1428
1429    // ========== Config Tests ==========
1430
1431    #[test]
1432    fn test_from_config_defaults() {
1433        let config = crate::config::Config::default();
1434        let rule = MD073TocValidation::from_config(&config);
1435        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1436
1437        assert_eq!(rule.min_level, 2);
1438        assert_eq!(rule.max_level, 4);
1439        assert!(rule.enforce_order);
1440        assert_eq!(rule.indent, 2);
1441    }
1442
1443    #[test]
1444    fn test_indent_from_md007_config() {
1445        use crate::config::{Config, RuleConfig};
1446        use std::collections::BTreeMap;
1447
1448        let mut config = Config::default();
1449
1450        // Set MD007 indent to 4
1451        let mut md007_values = BTreeMap::new();
1452        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1453        config.rules.insert(
1454            "MD007".to_string(),
1455            RuleConfig {
1456                severity: None,
1457                values: md007_values,
1458            },
1459        );
1460
1461        let rule = MD073TocValidation::from_config(&config);
1462        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1463
1464        assert_eq!(rule.indent, 4, "Should read indent from MD007 config");
1465    }
1466
1467    #[test]
1468    fn test_indent_md073_overrides_md007() {
1469        use crate::config::{Config, RuleConfig};
1470        use std::collections::BTreeMap;
1471
1472        let mut config = Config::default();
1473
1474        // Set MD007 indent to 4
1475        let mut md007_values = BTreeMap::new();
1476        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1477        config.rules.insert(
1478            "MD007".to_string(),
1479            RuleConfig {
1480                severity: None,
1481                values: md007_values,
1482            },
1483        );
1484
1485        // Set MD073 indent to 3 (should override MD007)
1486        let mut md073_values = BTreeMap::new();
1487        md073_values.insert("enabled".to_string(), toml::Value::Boolean(true));
1488        md073_values.insert("indent".to_string(), toml::Value::Integer(3));
1489        config.rules.insert(
1490            "MD073".to_string(),
1491            RuleConfig {
1492                severity: None,
1493                values: md073_values,
1494            },
1495        );
1496
1497        let rule = MD073TocValidation::from_config(&config);
1498        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1499
1500        assert_eq!(rule.indent, 3, "MD073 indent should override MD007");
1501    }
1502
1503    #[test]
1504    fn test_generate_toc_with_4_space_indent() {
1505        let mut rule = create_enabled_rule();
1506        rule.indent = 4;
1507
1508        let content = r#"<!-- toc -->
1509
1510<!-- tocstop -->
1511
1512## Level 2
1513
1514### Level 3
1515
1516#### Level 4
1517
1518## Another Level 2
1519"#;
1520        let ctx = create_ctx(content);
1521        let region = rule.detect_toc_region(&ctx).unwrap();
1522        let expected = rule.build_expected_toc(&ctx, &region);
1523        let toc = rule.generate_toc(&expected);
1524
1525        // With 4-space indent:
1526        // Level 2 = 0 spaces (base level)
1527        // Level 3 = 4 spaces
1528        // Level 4 = 8 spaces
1529        assert!(toc.contains("- [Level 2](#level-2)"), "Level 2 should have no indent");
1530        assert!(
1531            toc.contains("    - [Level 3](#level-3)"),
1532            "Level 3 should have 4-space indent"
1533        );
1534        assert!(
1535            toc.contains("        - [Level 4](#level-4)"),
1536            "Level 4 should have 8-space indent"
1537        );
1538        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1539    }
1540
1541    #[test]
1542    fn test_validate_toc_with_4_space_indent() {
1543        let mut rule = create_enabled_rule();
1544        rule.indent = 4;
1545
1546        // TOC with correct 4-space indentation
1547        let content = r#"<!-- toc -->
1548- [Hello](#hello)
1549    - [Another](#another)
1550- [Heading](#heading)
1551<!-- tocstop -->
1552
1553## Hello
1554
1555### Another
1556
1557## Heading
1558"#;
1559        let ctx = create_ctx(content);
1560        let result = rule.check(&ctx).unwrap();
1561        assert!(
1562            result.is_empty(),
1563            "Should accept 4-space indent when configured: {result:?}"
1564        );
1565    }
1566
1567    #[test]
1568    fn test_validate_toc_wrong_indent_with_4_space_config() {
1569        let mut rule = create_enabled_rule();
1570        rule.indent = 4;
1571
1572        // TOC with 2-space indentation (wrong when 4-space is configured)
1573        let content = r#"<!-- toc -->
1574- [Hello](#hello)
1575  - [Another](#another)
1576- [Heading](#heading)
1577<!-- tocstop -->
1578
1579## Hello
1580
1581### Another
1582
1583## Heading
1584"#;
1585        let ctx = create_ctx(content);
1586        let result = rule.check(&ctx).unwrap();
1587        assert_eq!(result.len(), 1, "Should detect wrong indent");
1588        assert!(
1589            result[0].message.contains("Indentation mismatch"),
1590            "Should report indentation mismatch: {}",
1591            result[0].message
1592        );
1593        assert!(
1594            result[0].message.contains("expected 4 spaces"),
1595            "Should mention expected 4 spaces: {}",
1596            result[0].message
1597        );
1598    }
1599
1600    // ========== Markdown Stripping Tests ==========
1601
1602    #[test]
1603    fn test_strip_markdown_formatting_link() {
1604        let result = strip_markdown_formatting("Tool: [terminal](https://example.com)");
1605        assert_eq!(result, "Tool: terminal");
1606    }
1607
1608    #[test]
1609    fn test_strip_markdown_formatting_bold() {
1610        let result = strip_markdown_formatting("This is **bold** text");
1611        assert_eq!(result, "This is bold text");
1612
1613        let result = strip_markdown_formatting("This is __bold__ text");
1614        assert_eq!(result, "This is bold text");
1615    }
1616
1617    #[test]
1618    fn test_strip_markdown_formatting_italic() {
1619        let result = strip_markdown_formatting("This is *italic* text");
1620        assert_eq!(result, "This is italic text");
1621
1622        let result = strip_markdown_formatting("This is _italic_ text");
1623        assert_eq!(result, "This is italic text");
1624    }
1625
1626    #[test]
1627    fn test_strip_markdown_formatting_code_span() {
1628        let result = strip_markdown_formatting("Use the `format` function");
1629        assert_eq!(result, "Use the format function");
1630    }
1631
1632    #[test]
1633    fn test_strip_markdown_formatting_image() {
1634        let result = strip_markdown_formatting("See ![logo](image.png) for details");
1635        assert_eq!(result, "See logo for details");
1636    }
1637
1638    #[test]
1639    fn test_strip_markdown_formatting_reference_link() {
1640        let result = strip_markdown_formatting("See [documentation][docs] for details");
1641        assert_eq!(result, "See documentation for details");
1642    }
1643
1644    #[test]
1645    fn test_strip_markdown_formatting_combined() {
1646        // Link is stripped first, leaving bold, then bold is stripped
1647        let result = strip_markdown_formatting("Tool: [**terminal**](https://example.com)");
1648        assert_eq!(result, "Tool: terminal");
1649    }
1650
1651    #[test]
1652    fn test_toc_with_link_in_heading_matches_stripped_text() {
1653        let rule = create_enabled_rule();
1654
1655        // TOC entry text matches the stripped heading text
1656        let content = r#"# Title
1657
1658<!-- toc -->
1659
1660- [Tool: terminal](#tool-terminal)
1661
1662<!-- tocstop -->
1663
1664## Tool: [terminal](https://example.com)
1665
1666Content here.
1667"#;
1668        let ctx = create_ctx(content);
1669        let result = rule.check(&ctx).unwrap();
1670        assert!(
1671            result.is_empty(),
1672            "Stripped heading text should match TOC entry: {result:?}"
1673        );
1674    }
1675
1676    #[test]
1677    fn test_toc_with_simplified_text_still_mismatches() {
1678        let rule = create_enabled_rule();
1679
1680        // TOC entry "terminal" does NOT match stripped heading "Tool: terminal"
1681        let content = r#"# Title
1682
1683<!-- toc -->
1684
1685- [terminal](#tool-terminal)
1686
1687<!-- tocstop -->
1688
1689## Tool: [terminal](https://example.com)
1690
1691Content here.
1692"#;
1693        let ctx = create_ctx(content);
1694        let result = rule.check(&ctx).unwrap();
1695        assert_eq!(result.len(), 1, "Should report text mismatch");
1696        assert!(result[0].message.contains("Text mismatch"));
1697    }
1698
1699    #[test]
1700    fn test_fix_generates_stripped_toc_entries() {
1701        let rule = MD073TocValidation::new();
1702        let content = r#"# Title
1703
1704<!-- toc -->
1705
1706<!-- tocstop -->
1707
1708## Tool: [busybox](https://www.busybox.net/)
1709
1710Content.
1711
1712## Tool: [mount](https://en.wikipedia.org/wiki/Mount)
1713
1714More content.
1715"#;
1716        let ctx = create_ctx(content);
1717        let fixed = rule.fix(&ctx).unwrap();
1718
1719        // Generated TOC should have stripped text (links removed)
1720        assert!(
1721            fixed.contains("- [Tool: busybox](#tool-busybox)"),
1722            "TOC entry should have stripped link text"
1723        );
1724        assert!(
1725            fixed.contains("- [Tool: mount](#tool-mount)"),
1726            "TOC entry should have stripped link text"
1727        );
1728        // TOC entries should NOT contain the URL (the actual headings in the document still will)
1729        // Check only within the TOC region (between toc markers)
1730        let toc_start = fixed.find("<!-- toc -->").unwrap();
1731        let toc_end = fixed.find("<!-- tocstop -->").unwrap();
1732        let toc_content = &fixed[toc_start..toc_end];
1733        assert!(
1734            !toc_content.contains("busybox.net"),
1735            "TOC should not contain URLs: {toc_content}"
1736        );
1737        assert!(
1738            !toc_content.contains("wikipedia.org"),
1739            "TOC should not contain URLs: {toc_content}"
1740        );
1741    }
1742
1743    #[test]
1744    fn test_fix_with_bold_in_heading() {
1745        let rule = MD073TocValidation::new();
1746        let content = r#"# Title
1747
1748<!-- toc -->
1749
1750<!-- tocstop -->
1751
1752## **Important** Section
1753
1754Content.
1755"#;
1756        let ctx = create_ctx(content);
1757        let fixed = rule.fix(&ctx).unwrap();
1758
1759        // Generated TOC should have stripped text (bold markers removed)
1760        assert!(fixed.contains("- [Important Section](#important-section)"));
1761    }
1762
1763    #[test]
1764    fn test_fix_with_code_in_heading() {
1765        let rule = MD073TocValidation::new();
1766        let content = r#"# Title
1767
1768<!-- toc -->
1769
1770<!-- tocstop -->
1771
1772## Using `async` Functions
1773
1774Content.
1775"#;
1776        let ctx = create_ctx(content);
1777        let fixed = rule.fix(&ctx).unwrap();
1778
1779        // Generated TOC should have stripped text (backticks removed)
1780        assert!(fixed.contains("- [Using async Functions](#using-async-functions)"));
1781    }
1782
1783    // ========== Custom Anchor Tests ==========
1784
1785    #[test]
1786    fn test_custom_anchor_id_respected() {
1787        let rule = create_enabled_rule();
1788        let content = r#"# Title
1789
1790<!-- toc -->
1791
1792- [My Section](#my-custom-anchor)
1793
1794<!-- tocstop -->
1795
1796## My Section {#my-custom-anchor}
1797
1798Content here.
1799"#;
1800        let ctx = create_ctx(content);
1801        let result = rule.check(&ctx).unwrap();
1802        assert!(result.is_empty(), "Should respect custom anchor IDs: {result:?}");
1803    }
1804
1805    #[test]
1806    fn test_custom_anchor_id_in_generated_toc() {
1807        let rule = create_enabled_rule();
1808        let content = r#"# Title
1809
1810<!-- toc -->
1811
1812<!-- tocstop -->
1813
1814## First Section {#custom-first}
1815
1816Content.
1817
1818## Second Section {#another-custom}
1819
1820More content.
1821"#;
1822        let ctx = create_ctx(content);
1823        let fixed = rule.fix(&ctx).unwrap();
1824        assert!(fixed.contains("- [First Section](#custom-first)"));
1825        assert!(fixed.contains("- [Second Section](#another-custom)"));
1826    }
1827
1828    #[test]
1829    fn test_mixed_custom_and_generated_anchors() {
1830        let rule = create_enabled_rule();
1831        let content = r#"# Title
1832
1833<!-- toc -->
1834
1835- [Custom Section](#my-id)
1836- [Normal Section](#normal-section)
1837
1838<!-- tocstop -->
1839
1840## Custom Section {#my-id}
1841
1842Content.
1843
1844## Normal Section
1845
1846More content.
1847"#;
1848        let ctx = create_ctx(content);
1849        let result = rule.check(&ctx).unwrap();
1850        assert!(result.is_empty(), "Should handle mixed custom and generated anchors");
1851    }
1852
1853    // ========== Anchor Generation Tests ==========
1854
1855    #[test]
1856    fn test_github_anchor_style() {
1857        let rule = create_enabled_rule();
1858
1859        let content = r#"<!-- toc -->
1860
1861<!-- tocstop -->
1862
1863## Test_With_Underscores
1864
1865Content.
1866"#;
1867        let ctx = create_ctx(content);
1868        let region = rule.detect_toc_region(&ctx).unwrap();
1869        let expected = rule.build_expected_toc(&ctx, &region);
1870
1871        // GitHub-style anchors preserve underscores
1872        assert_eq!(expected[0].anchor, "test_with_underscores");
1873    }
1874
1875    // ========== Stress Tests ==========
1876
1877    #[test]
1878    fn test_stress_many_headings() {
1879        let rule = create_enabled_rule();
1880
1881        // Generate a document with 150 headings
1882        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1883
1884        for i in 1..=150 {
1885            content.push_str(&format!("## Heading Number {i}\n\nContent for section {i}.\n\n"));
1886        }
1887
1888        let ctx = create_ctx(&content);
1889
1890        // Should not panic or timeout
1891        let result = rule.check(&ctx).unwrap();
1892
1893        // Should report missing entries for all 150 headings
1894        assert_eq!(result.len(), 1, "Should report single warning for TOC");
1895        assert!(result[0].message.contains("Missing entry"));
1896
1897        // Fix should generate TOC with 150 entries
1898        let fixed = rule.fix(&ctx).unwrap();
1899        assert!(fixed.contains("- [Heading Number 1](#heading-number-1)"));
1900        assert!(fixed.contains("- [Heading Number 100](#heading-number-100)"));
1901        assert!(fixed.contains("- [Heading Number 150](#heading-number-150)"));
1902    }
1903
1904    #[test]
1905    fn test_stress_deeply_nested() {
1906        let rule = create_enabled_rule();
1907        let content = r#"# Title
1908
1909<!-- toc -->
1910
1911<!-- tocstop -->
1912
1913## Level 2 A
1914
1915### Level 3 A
1916
1917#### Level 4 A
1918
1919## Level 2 B
1920
1921### Level 3 B
1922
1923#### Level 4 B
1924
1925## Level 2 C
1926
1927### Level 3 C
1928
1929#### Level 4 C
1930
1931## Level 2 D
1932
1933### Level 3 D
1934
1935#### Level 4 D
1936"#;
1937        let ctx = create_ctx(content);
1938        let fixed = rule.fix(&ctx).unwrap();
1939
1940        // Check nested indentation is correct
1941        assert!(fixed.contains("- [Level 2 A](#level-2-a)"));
1942        assert!(fixed.contains("  - [Level 3 A](#level-3-a)"));
1943        assert!(fixed.contains("    - [Level 4 A](#level-4-a)"));
1944        assert!(fixed.contains("- [Level 2 D](#level-2-d)"));
1945        assert!(fixed.contains("  - [Level 3 D](#level-3-d)"));
1946        assert!(fixed.contains("    - [Level 4 D](#level-4-d)"));
1947    }
1948
1949    // ==================== Duplicate TOC anchors ====================
1950
1951    #[test]
1952    fn test_duplicate_toc_anchors_produce_correct_diagnostics() {
1953        let rule = create_enabled_rule();
1954        // Document has headings "Example", "Another", "Example" which produce anchors:
1955        // "example", "another", "example-1"
1956        // TOC incorrectly uses #example twice instead of #example and #example-1
1957        let content = r#"# Document
1958
1959<!-- toc -->
1960
1961- [Example](#example)
1962- [Another](#another)
1963- [Example](#example)
1964
1965<!-- tocstop -->
1966
1967## Example
1968First.
1969
1970## Another
1971Middle.
1972
1973## Example
1974Second.
1975"#;
1976        let ctx = create_ctx(content);
1977        let result = rule.check(&ctx).unwrap();
1978
1979        // The TOC has #example twice but expected has #example and #example-1.
1980        // Should report that #example-1 is missing from the TOC.
1981        assert!(!result.is_empty(), "Should detect mismatch with duplicate TOC anchors");
1982        assert!(
1983            result[0].message.contains("Missing entry") || result[0].message.contains("Stale entry"),
1984            "Should report missing or stale entries for duplicate anchors. Got: {}",
1985            result[0].message
1986        );
1987    }
1988
1989    // ==================== Multi-backtick code spans ====================
1990
1991    #[test]
1992    fn test_strip_double_backtick_code_span() {
1993        // Double-backtick code spans should be stripped
1994        let result = strip_markdown_formatting("Using ``code with ` backtick``");
1995        assert_eq!(
1996            result, "Using code with ` backtick",
1997            "Should strip double-backtick code spans"
1998        );
1999    }
2000
2001    #[test]
2002    fn test_strip_triple_backtick_code_span() {
2003        // Triple-backtick code spans should be stripped
2004        let result = strip_markdown_formatting("Using ```code with `` backticks```");
2005        assert_eq!(
2006            result, "Using code with `` backticks",
2007            "Should strip triple-backtick code spans"
2008        );
2009    }
2010
2011    #[test]
2012    fn test_toc_with_double_backtick_heading() {
2013        let rule = create_enabled_rule();
2014        let content = r#"# Title
2015
2016<!-- toc -->
2017
2018- [Using code with backtick](#using-code-with-backtick)
2019
2020<!-- tocstop -->
2021
2022## Using ``code with ` backtick``
2023
2024Content here.
2025"#;
2026        let ctx = create_ctx(content);
2027        // The heading uses double-backtick code span: ``code with ` backtick``
2028        // After stripping, heading text = "Using code with ` backtick"
2029        // The fix should produce a TOC entry with the stripped text
2030        let fixed = rule.fix(&ctx).unwrap();
2031        // The generated TOC should have the stripped heading text
2032        assert!(
2033            fixed.contains("code with ` backtick") || fixed.contains("code with backtick"),
2034            "Fix should strip double-backtick code span from heading. Got TOC: {}",
2035            &fixed[fixed.find("<!-- toc -->").unwrap()..fixed.find("<!-- tocstop -->").unwrap()]
2036        );
2037    }
2038
2039    #[test]
2040    fn test_stress_many_duplicates() {
2041        let rule = create_enabled_rule();
2042
2043        // Generate 50 headings with the same text
2044        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
2045        for _ in 0..50 {
2046            content.push_str("## FAQ\n\nContent.\n\n");
2047        }
2048
2049        let ctx = create_ctx(&content);
2050        let region = rule.detect_toc_region(&ctx).unwrap();
2051        let expected = rule.build_expected_toc(&ctx, &region);
2052
2053        // Should generate unique anchors for all 50
2054        assert_eq!(expected.len(), 50);
2055        assert_eq!(expected[0].anchor, "faq");
2056        assert_eq!(expected[1].anchor, "faq-1");
2057        assert_eq!(expected[49].anchor, "faq-49");
2058    }
2059
2060    /// Core invariant: for every warning with a Fix, fix() must produce
2061    /// output consistent with applying that fix directly.
2062    #[test]
2063    fn test_roundtrip_check_and_fix_alignment() {
2064        let rule = create_enabled_rule();
2065
2066        let inputs = [
2067            // Stale entry
2068            "# Title\n\n<!-- toc -->\n- [Old Section](#old-section)\n<!-- tocstop -->\n\n## New Section\n",
2069            // Missing entry
2070            "# Title\n\n<!-- toc -->\n<!-- tocstop -->\n\n## One\n\n## Two\n",
2071            // Text mismatch
2072            "# Title\n\n<!-- toc -->\n- [Wrong Text](#real-section)\n<!-- tocstop -->\n\n## Real Section\n",
2073            // Already correct (no warnings, no change)
2074            "# Title\n\n<!-- toc -->\n- [One](#one)\n- [Two](#two)\n<!-- tocstop -->\n\n## One\n\n## Two\n",
2075        ];
2076
2077        for input in &inputs {
2078            let ctx = create_ctx(input);
2079            let fixed = rule.fix(&ctx).unwrap();
2080
2081            // Idempotency: fix(fix(x)) == fix(x)
2082            let ctx2 = create_ctx(&fixed);
2083            let fixed_twice = rule.fix(&ctx2).unwrap();
2084            assert_eq!(
2085                fixed, fixed_twice,
2086                "fix() is not idempotent for input: {input:?}\nfirst:  {fixed:?}\nsecond: {fixed_twice:?}"
2087            );
2088
2089            // After fix, check() should produce no warnings
2090            let warnings_after = rule.check(&ctx2).unwrap();
2091            assert!(
2092                warnings_after.is_empty(),
2093                "check() should return no warnings after fix() for input: {input:?}\nfixed: {fixed:?}\nwarnings: {warnings_after:?}"
2094            );
2095        }
2096    }
2097
2098    /// If a TOC has no mismatches, check() emits no warnings and fix()
2099    /// returns content unchanged.
2100    #[test]
2101    fn test_no_mismatch_preserves_content() {
2102        let rule = create_enabled_rule();
2103
2104        let content = "# Title\n\n<!-- toc -->\n- [First Section](#first-section)\n- [Second Section](#second-section)\n<!-- tocstop -->\n\n## First Section\n\ntext\n\n## Second Section\n\ntext\n";
2105        let ctx = create_ctx(content);
2106
2107        let warnings = rule.check(&ctx).unwrap();
2108        assert!(warnings.is_empty(), "No mismatches should emit no warnings");
2109
2110        let fixed = rule.fix(&ctx).unwrap();
2111        assert_eq!(fixed, content, "Content should be unchanged when TOC matches headings");
2112    }
2113
2114    /// Inline-disabled TOC should not be modified by fix().
2115    #[test]
2116    fn test_inline_disable_preserves_toc() {
2117        let rule = create_enabled_rule();
2118
2119        // TOC with a stale entry, but MD073 disabled for the TOC region
2120        let content = "# Title\n\n<!-- rumdl-disable MD073 -->\n<!-- toc -->\n- [Stale](#stale)\n<!-- tocstop -->\n<!-- rumdl-enable MD073 -->\n\n## Real\n";
2121        let ctx = create_ctx(content);
2122
2123        let fixed = rule.fix(&ctx).unwrap();
2124        assert_eq!(fixed, content, "TOC in a disabled region should be preserved exactly");
2125    }
2126}