Skip to main content

rumdl_lib/rules/
md073_toc_validation.rs

1//! MD073: Table of Contents validation rule
2//!
3//! Validates that TOC sections match the actual document headings.
4
5use crate::lint_context::LintContext;
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::anchor_styles::AnchorStyle;
8use regex::Regex;
9use std::collections::HashMap;
10use std::sync::LazyLock;
11
12/// Regex for TOC start marker: `<!-- toc -->` with optional whitespace variations
13static TOC_START_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*toc\s*-->").unwrap());
14
15/// Regex for TOC stop marker: `<!-- tocstop -->` or `<!-- /toc -->`
16static TOC_STOP_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*(?:tocstop|/toc)\s*-->").unwrap());
17
18/// Regex for extracting TOC entries: `- [text](#anchor)` or `* [text](#anchor)`
19/// with optional leading whitespace for nested items
20/// Handles nested brackets like `[`check [PATHS...]`](#check-paths)`
21static TOC_ENTRY_PATTERN: LazyLock<Regex> =
22    LazyLock::new(|| Regex::new(r"^(\s*)[-*]\s+\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]\(#([^)]+)\)").unwrap());
23
24/// Represents a detected TOC region in the document
25#[derive(Debug, Clone)]
26struct TocRegion {
27    /// 1-indexed start line of the TOC content (after the marker)
28    start_line: usize,
29    /// 1-indexed end line of the TOC content (before the stop marker)
30    end_line: usize,
31    /// Byte offset where TOC content starts
32    content_start: usize,
33    /// Byte offset where TOC content ends
34    content_end: usize,
35}
36
37/// A parsed TOC entry from the existing TOC
38#[derive(Debug, Clone)]
39struct TocEntry {
40    /// Display text of the link
41    text: String,
42    /// Anchor/fragment (without #)
43    anchor: String,
44    /// Number of leading whitespace characters (for indentation checking)
45    indent_spaces: usize,
46}
47
48/// An expected TOC entry generated from document headings
49#[derive(Debug, Clone)]
50struct ExpectedTocEntry {
51    /// 1-indexed line number of the heading
52    heading_line: usize,
53    /// Heading level (1-6)
54    level: u8,
55    /// Heading text (for display)
56    text: String,
57    /// Generated anchor
58    anchor: String,
59}
60
61/// Types of mismatches between actual and expected TOC
62#[derive(Debug)]
63enum TocMismatch {
64    /// Entry exists in TOC but heading doesn't exist
65    StaleEntry { entry: TocEntry },
66    /// Heading exists but no TOC entry for it
67    MissingEntry { expected: ExpectedTocEntry },
68    /// TOC entry text doesn't match heading text
69    TextMismatch {
70        entry: TocEntry,
71        expected: ExpectedTocEntry,
72    },
73    /// TOC entries are in wrong order
74    OrderMismatch { entry: TocEntry, expected_position: usize },
75    /// TOC entry has wrong indentation level
76    IndentationMismatch {
77        entry: TocEntry,
78        actual_indent: usize,
79        expected_indent: usize,
80    },
81}
82
83/// Regex patterns for stripping markdown formatting from heading text
84static MARKDOWN_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\([^)]+\)").unwrap());
85static MARKDOWN_REF_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[[^\]]*\]").unwrap());
86static MARKDOWN_IMAGE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").unwrap());
87/// Strip code spans from text, handling multi-backtick spans per CommonMark spec.
88/// E.g., `` `code` ``, ``` ``code with ` backtick`` ```, etc.
89fn strip_code_spans(text: &str) -> String {
90    let chars: Vec<char> = text.chars().collect();
91    let len = chars.len();
92    let mut result = String::with_capacity(text.len());
93    let mut i = 0;
94
95    while i < len {
96        if chars[i] == '`' {
97            // Count opening backticks
98            let open_start = i;
99            while i < len && chars[i] == '`' {
100                i += 1;
101            }
102            let backtick_count = i - open_start;
103
104            // Find matching closing backticks (same count)
105            let content_start = i;
106            let mut found_close = false;
107            while i < len {
108                if chars[i] == '`' {
109                    let close_start = i;
110                    while i < len && chars[i] == '`' {
111                        i += 1;
112                    }
113                    if i - close_start == backtick_count {
114                        // Found matching close - extract content
115                        let content: String = chars[content_start..close_start].iter().collect();
116                        // CommonMark: strip one leading and one trailing space if both exist
117                        let stripped = if content.starts_with(' ') && content.ends_with(' ') && content.len() > 1 {
118                            &content[1..content.len() - 1]
119                        } else {
120                            &content
121                        };
122                        result.push_str(stripped);
123                        found_close = true;
124                        break;
125                    }
126                } else {
127                    i += 1;
128                }
129            }
130            if !found_close {
131                // No matching close found - emit backticks literally
132                for _ in 0..backtick_count {
133                    result.push('`');
134                }
135                let remaining: String = chars[content_start..].iter().collect();
136                result.push_str(&remaining);
137                break;
138            }
139        } else {
140            result.push(chars[i]);
141            i += 1;
142        }
143    }
144
145    result
146}
147static MARKDOWN_BOLD_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*([^*]+)\*\*").unwrap());
148static MARKDOWN_BOLD_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__([^_]+)__").unwrap());
149static MARKDOWN_ITALIC_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+)\*").unwrap());
150// Match underscore italic at word boundaries (space or start/end)
151// Handles: "_text_", " _text_ ", "start _text_", "_text_ end"
152static MARKDOWN_ITALIC_UNDERSCORE: LazyLock<Regex> =
153    LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])_([^_]+)_([^a-zA-Z0-9]|$)").unwrap());
154
155/// Strip markdown formatting from text, preserving plain text content.
156/// Used for TOC entry display text.
157///
158/// Examples:
159/// - `[terminal](url)` → `terminal`
160/// - `**bold**` → `bold`
161/// - `` `code` `` → `code`
162/// - `Tool: [terminal](url)` → `Tool: terminal`
163fn strip_markdown_formatting(text: &str) -> String {
164    let mut result = text.to_string();
165
166    // Strip images first (before links, since images use similar syntax)
167    result = MARKDOWN_IMAGE.replace_all(&result, "$1").to_string();
168
169    // Strip links: [text](url) → text
170    result = MARKDOWN_LINK.replace_all(&result, "$1").to_string();
171
172    // Strip reference links: [text][ref] → text
173    result = MARKDOWN_REF_LINK.replace_all(&result, "$1").to_string();
174
175    // Strip code spans (handles multi-backtick spans like ``code with ` backtick``)
176    result = strip_code_spans(&result);
177
178    // Strip bold (do double before single to handle nested)
179    result = MARKDOWN_BOLD_ASTERISK.replace_all(&result, "$1").to_string();
180    result = MARKDOWN_BOLD_UNDERSCORE.replace_all(&result, "$1").to_string();
181
182    // Strip italic
183    result = MARKDOWN_ITALIC_ASTERISK.replace_all(&result, "$1").to_string();
184    // Underscore italic: preserve boundary chars, extract content
185    result = MARKDOWN_ITALIC_UNDERSCORE.replace_all(&result, "$1$2$3").to_string();
186
187    result
188}
189
190/// MD073: Table of Contents Validation
191///
192/// This rule validates that TOC sections match the actual document headings.
193/// It detects TOC regions via markers (`<!-- toc -->...<!-- tocstop -->`).
194///
195/// To opt into TOC validation, add markers to your document:
196/// ```markdown
197/// <!-- toc -->
198/// - [Section](#section)
199/// <!-- tocstop -->
200/// ```
201///
202/// ## Configuration
203///
204/// ```toml
205/// [MD073]
206/// # Enable the rule (opt-in, disabled by default)
207/// enabled = true
208/// # Minimum heading level to include (default: 2)
209/// min-level = 2
210/// # Maximum heading level to include (default: 4)
211/// max-level = 4
212/// # Whether TOC order must match document order (default: true)
213/// enforce-order = true
214/// # Indent size per nesting level (default: from MD007 config, or 2)
215/// indent = 2
216/// ```
217#[derive(Clone)]
218pub struct MD073TocValidation {
219    /// Whether this rule is enabled (default: false - opt-in rule)
220    enabled: bool,
221    /// Minimum heading level to include
222    min_level: u8,
223    /// Maximum heading level to include
224    max_level: u8,
225    /// Whether to enforce order matching
226    enforce_order: bool,
227    /// Indent size per nesting level (reads from MD007 config by default)
228    pub indent: usize,
229}
230
231impl Default for MD073TocValidation {
232    fn default() -> Self {
233        Self {
234            enabled: false, // Disabled by default - opt-in rule
235            min_level: 2,
236            max_level: 4,
237            enforce_order: true,
238            indent: 2, // Default indent, can be overridden by MD007 config
239        }
240    }
241}
242
243impl std::fmt::Debug for MD073TocValidation {
244    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
245        f.debug_struct("MD073TocValidation")
246            .field("enabled", &self.enabled)
247            .field("min_level", &self.min_level)
248            .field("max_level", &self.max_level)
249            .field("enforce_order", &self.enforce_order)
250            .field("indent", &self.indent)
251            .finish()
252    }
253}
254
255impl MD073TocValidation {
256    /// Create a new rule with default settings
257    pub fn new() -> Self {
258        Self::default()
259    }
260
261    /// Detect TOC region using markers
262    fn detect_by_markers(&self, ctx: &LintContext) -> Option<TocRegion> {
263        let mut start_line = None;
264        let mut start_byte = None;
265
266        for (idx, line_info) in ctx.lines.iter().enumerate() {
267            let line_num = idx + 1;
268            let content = line_info.content(ctx.content);
269
270            // Skip if in code block or front matter
271            if line_info.in_code_block || line_info.in_front_matter {
272                continue;
273            }
274
275            // Look for start marker or stop marker
276            if let (Some(s_line), Some(s_byte)) = (start_line, start_byte) {
277                // We have a start, now look for stop marker
278                if TOC_STOP_MARKER.is_match(content) {
279                    let end_line = line_num - 1;
280                    let content_end = line_info.byte_offset;
281
282                    // Handle case where there's no content between markers
283                    if end_line < s_line {
284                        return Some(TocRegion {
285                            start_line: s_line,
286                            end_line: s_line,
287                            content_start: s_byte,
288                            content_end: s_byte,
289                        });
290                    }
291
292                    return Some(TocRegion {
293                        start_line: s_line,
294                        end_line,
295                        content_start: s_byte,
296                        content_end,
297                    });
298                }
299            } else if TOC_START_MARKER.is_match(content) {
300                // TOC content starts on the next line
301                if idx + 1 < ctx.lines.len() {
302                    start_line = Some(line_num + 1);
303                    start_byte = Some(ctx.lines[idx + 1].byte_offset);
304                }
305            }
306        }
307
308        None
309    }
310
311    /// Detect TOC region using markers
312    fn detect_toc_region(&self, ctx: &LintContext) -> Option<TocRegion> {
313        self.detect_by_markers(ctx)
314    }
315
316    /// Extract TOC entries from the detected region
317    fn extract_toc_entries(&self, ctx: &LintContext, region: &TocRegion) -> Vec<TocEntry> {
318        let mut entries = Vec::new();
319
320        for idx in (region.start_line - 1)..region.end_line.min(ctx.lines.len()) {
321            let line_info = &ctx.lines[idx];
322            let content = line_info.content(ctx.content);
323
324            if let Some(caps) = TOC_ENTRY_PATTERN.captures(content) {
325                let indent_spaces = caps.get(1).map_or(0, |m| m.as_str().len());
326                let text = caps.get(2).map_or("", |m| m.as_str()).to_string();
327                let anchor = caps.get(3).map_or("", |m| m.as_str()).to_string();
328
329                entries.push(TocEntry {
330                    text,
331                    anchor,
332                    indent_spaces,
333                });
334            }
335        }
336
337        entries
338    }
339
340    /// Build expected TOC entries from document headings
341    fn build_expected_toc(&self, ctx: &LintContext, toc_region: &TocRegion) -> Vec<ExpectedTocEntry> {
342        let mut entries = Vec::new();
343        let mut fragment_counts: HashMap<String, usize> = HashMap::new();
344
345        for (idx, line_info) in ctx.lines.iter().enumerate() {
346            let line_num = idx + 1;
347
348            // Skip headings before/within the TOC region
349            if line_num <= toc_region.end_line {
350                // Also skip the TOC heading itself for heading-based detection
351                continue;
352            }
353
354            // Skip code blocks, front matter, HTML blocks
355            if line_info.in_code_block || line_info.in_front_matter || line_info.in_html_block {
356                continue;
357            }
358
359            if let Some(heading) = &line_info.heading {
360                // Filter by min/max level
361                if heading.level < self.min_level || heading.level > self.max_level {
362                    continue;
363                }
364
365                // Use custom ID if available, otherwise generate GitHub-style anchor
366                let base_anchor = if let Some(custom_id) = &heading.custom_id {
367                    custom_id.clone()
368                } else {
369                    AnchorStyle::GitHub.generate_fragment(&heading.text)
370                };
371
372                // Handle duplicate anchors
373                let anchor = if let Some(count) = fragment_counts.get_mut(&base_anchor) {
374                    let suffix = *count;
375                    *count += 1;
376                    format!("{base_anchor}-{suffix}")
377                } else {
378                    fragment_counts.insert(base_anchor.clone(), 1);
379                    base_anchor
380                };
381
382                entries.push(ExpectedTocEntry {
383                    heading_line: line_num,
384                    level: heading.level,
385                    text: heading.text.clone(),
386                    anchor,
387                });
388            }
389        }
390
391        entries
392    }
393
394    /// Compare actual TOC entries against expected and find mismatches
395    fn validate_toc(&self, actual: &[TocEntry], expected: &[ExpectedTocEntry]) -> Vec<TocMismatch> {
396        let mut mismatches = Vec::new();
397
398        // Build a map of expected anchors
399        let expected_anchors: HashMap<&str, &ExpectedTocEntry> =
400            expected.iter().map(|e| (e.anchor.as_str(), e)).collect();
401
402        // Count actual anchors (handles duplicate anchors in TOC)
403        let mut actual_anchor_counts: HashMap<&str, usize> = HashMap::new();
404        for entry in actual {
405            *actual_anchor_counts.entry(entry.anchor.as_str()).or_insert(0) += 1;
406        }
407
408        // Count expected anchors
409        let mut expected_anchor_counts: HashMap<&str, usize> = HashMap::new();
410        for exp in expected {
411            *expected_anchor_counts.entry(exp.anchor.as_str()).or_insert(0) += 1;
412        }
413
414        // Check for stale entries (in TOC but not in expected, accounting for counts)
415        let mut stale_anchor_counts: HashMap<&str, usize> = HashMap::new();
416        for entry in actual {
417            let actual_count = actual_anchor_counts.get(entry.anchor.as_str()).copied().unwrap_or(0);
418            let expected_count = expected_anchor_counts.get(entry.anchor.as_str()).copied().unwrap_or(0);
419            if actual_count > expected_count {
420                let reported = stale_anchor_counts.entry(entry.anchor.as_str()).or_insert(0);
421                if *reported < actual_count - expected_count {
422                    *reported += 1;
423                    mismatches.push(TocMismatch::StaleEntry { entry: entry.clone() });
424                }
425            }
426        }
427
428        // Check for missing entries (in expected but not in TOC, accounting for counts)
429        let mut missing_anchor_counts: HashMap<&str, usize> = HashMap::new();
430        for exp in expected {
431            let actual_count = actual_anchor_counts.get(exp.anchor.as_str()).copied().unwrap_or(0);
432            let expected_count = expected_anchor_counts.get(exp.anchor.as_str()).copied().unwrap_or(0);
433            if expected_count > actual_count {
434                let reported = missing_anchor_counts.entry(exp.anchor.as_str()).or_insert(0);
435                if *reported < expected_count - actual_count {
436                    *reported += 1;
437                    mismatches.push(TocMismatch::MissingEntry { expected: exp.clone() });
438                }
439            }
440        }
441
442        // Check for text mismatches (compare stripped versions)
443        for entry in actual {
444            if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
445                // Compare stripped text (removes markdown formatting like links, emphasis)
446                let actual_stripped = strip_markdown_formatting(entry.text.trim());
447                let expected_stripped = strip_markdown_formatting(exp.text.trim());
448                if actual_stripped != expected_stripped {
449                    mismatches.push(TocMismatch::TextMismatch {
450                        entry: entry.clone(),
451                        expected: (*exp).clone(),
452                    });
453                }
454            }
455        }
456
457        // Check for indentation mismatches
458        // Expected indentation is indent spaces per level difference from base level
459        if !expected.is_empty() {
460            let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
461
462            for entry in actual {
463                if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
464                    let level_diff = exp.level.saturating_sub(base_level) as usize;
465                    let expected_indent = level_diff * self.indent;
466
467                    if entry.indent_spaces != expected_indent {
468                        // Don't report indentation mismatch if already reported as text mismatch
469                        let already_reported = mismatches.iter().any(|m| match m {
470                            TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
471                            TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
472                            _ => false,
473                        });
474                        if !already_reported {
475                            mismatches.push(TocMismatch::IndentationMismatch {
476                                entry: entry.clone(),
477                                actual_indent: entry.indent_spaces,
478                                expected_indent,
479                            });
480                        }
481                    }
482                }
483            }
484        }
485
486        // Check order if enforce_order is enabled
487        if self.enforce_order && !actual.is_empty() && !expected.is_empty() {
488            let expected_order: Vec<&str> = expected.iter().map(|e| e.anchor.as_str()).collect();
489
490            // Find entries that exist in both but are out of order
491            let mut expected_idx = 0;
492            for entry in actual {
493                // Skip entries that don't exist in expected
494                if !expected_anchors.contains_key(entry.anchor.as_str()) {
495                    continue;
496                }
497
498                // Find where this anchor should be
499                while expected_idx < expected_order.len() && expected_order[expected_idx] != entry.anchor {
500                    expected_idx += 1;
501                }
502
503                if expected_idx >= expected_order.len() {
504                    // This entry is after where it should be
505                    let correct_pos = expected_order.iter().position(|a| *a == entry.anchor).unwrap_or(0);
506                    // Only add order mismatch if not already reported as stale/text mismatch
507                    let already_reported = mismatches.iter().any(|m| match m {
508                        TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
509                        TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
510                        _ => false,
511                    });
512                    if !already_reported {
513                        mismatches.push(TocMismatch::OrderMismatch {
514                            entry: entry.clone(),
515                            expected_position: correct_pos + 1,
516                        });
517                    }
518                } else {
519                    expected_idx += 1;
520                }
521            }
522        }
523
524        mismatches
525    }
526
527    /// Generate a new TOC from expected entries (always uses nested indentation)
528    fn generate_toc(&self, expected: &[ExpectedTocEntry]) -> String {
529        if expected.is_empty() {
530            return String::new();
531        }
532
533        let mut result = String::new();
534        let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
535        let indent_str = " ".repeat(self.indent);
536
537        for entry in expected {
538            let level_diff = entry.level.saturating_sub(base_level) as usize;
539            let indent = indent_str.repeat(level_diff);
540
541            // Strip markdown formatting from heading text for clean TOC entries
542            let display_text = strip_markdown_formatting(&entry.text);
543            result.push_str(&format!("{indent}- [{display_text}](#{})\n", entry.anchor));
544        }
545
546        result
547    }
548}
549
550impl Rule for MD073TocValidation {
551    fn name(&self) -> &'static str {
552        "MD073"
553    }
554
555    fn description(&self) -> &'static str {
556        "Table of Contents should match document headings"
557    }
558
559    fn should_skip(&self, ctx: &LintContext) -> bool {
560        // Quick check: skip if no TOC markers
561        let has_toc_marker = ctx.content.contains("<!-- toc") || ctx.content.contains("<!--toc");
562        !has_toc_marker
563    }
564
565    fn check(&self, ctx: &LintContext) -> LintResult {
566        let mut warnings = Vec::new();
567
568        // Detect TOC region
569        let Some(region) = self.detect_toc_region(ctx) else {
570            // No TOC found - nothing to validate
571            return Ok(warnings);
572        };
573
574        // Extract actual TOC entries
575        let actual_entries = self.extract_toc_entries(ctx, &region);
576
577        // Build expected TOC from headings
578        let expected_entries = self.build_expected_toc(ctx, &region);
579
580        // If no expected entries and no actual entries, nothing to validate
581        if expected_entries.is_empty() && actual_entries.is_empty() {
582            return Ok(warnings);
583        }
584
585        // Validate
586        let mismatches = self.validate_toc(&actual_entries, &expected_entries);
587
588        if !mismatches.is_empty() {
589            // Generate a single warning at the TOC region with details
590            let mut details = Vec::new();
591
592            for mismatch in &mismatches {
593                match mismatch {
594                    TocMismatch::StaleEntry { entry } => {
595                        details.push(format!("Stale entry: '{}' (heading no longer exists)", entry.text));
596                    }
597                    TocMismatch::MissingEntry { expected } => {
598                        details.push(format!(
599                            "Missing entry: '{}' (line {})",
600                            expected.text, expected.heading_line
601                        ));
602                    }
603                    TocMismatch::TextMismatch { entry, expected } => {
604                        details.push(format!(
605                            "Text mismatch: TOC has '{}', heading is '{}'",
606                            entry.text, expected.text
607                        ));
608                    }
609                    TocMismatch::OrderMismatch {
610                        entry,
611                        expected_position,
612                    } => {
613                        details.push(format!(
614                            "Order mismatch: '{}' should be at position {}",
615                            entry.text, expected_position
616                        ));
617                    }
618                    TocMismatch::IndentationMismatch {
619                        entry,
620                        actual_indent,
621                        expected_indent,
622                        ..
623                    } => {
624                        details.push(format!(
625                            "Indentation mismatch: '{}' has {} spaces, expected {} spaces",
626                            entry.text, actual_indent, expected_indent
627                        ));
628                    }
629                }
630            }
631
632            let message = format!(
633                "Table of Contents does not match document headings: {}",
634                details.join("; ")
635            );
636
637            // Generate fix: replace entire TOC content
638            let new_toc = self.generate_toc(&expected_entries);
639            let fix_range = region.content_start..region.content_end;
640
641            warnings.push(LintWarning {
642                rule_name: Some(self.name().to_string()),
643                message,
644                line: region.start_line,
645                column: 1,
646                end_line: region.end_line,
647                end_column: 1,
648                severity: Severity::Warning,
649                fix: Some(Fix {
650                    range: fix_range,
651                    replacement: new_toc,
652                }),
653            });
654        }
655
656        Ok(warnings)
657    }
658
659    fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
660        // Detect TOC region
661        let Some(region) = self.detect_toc_region(ctx) else {
662            // No TOC found - return unchanged
663            return Ok(ctx.content.to_string());
664        };
665
666        // Skip fix if rule is disabled via inline config at the TOC region
667        if ctx.is_rule_disabled(self.name(), region.start_line) {
668            return Ok(ctx.content.to_string());
669        }
670
671        // Build expected TOC from headings
672        let expected_entries = self.build_expected_toc(ctx, &region);
673
674        // Generate new TOC
675        let new_toc = self.generate_toc(&expected_entries);
676
677        // Replace the TOC content
678        let mut result = String::with_capacity(ctx.content.len());
679        result.push_str(&ctx.content[..region.content_start]);
680        result.push_str(&new_toc);
681        result.push_str(&ctx.content[region.content_end..]);
682
683        Ok(result)
684    }
685
686    fn category(&self) -> RuleCategory {
687        RuleCategory::Other
688    }
689
690    fn as_any(&self) -> &dyn std::any::Any {
691        self
692    }
693
694    fn default_config_section(&self) -> Option<(String, toml::Value)> {
695        let value: toml::Value = toml::from_str(
696            r#"
697# Whether this rule is enabled (opt-in, disabled by default)
698enabled = false
699# Minimum heading level to include
700min-level = 2
701# Maximum heading level to include
702max-level = 4
703# Whether TOC order must match document order
704enforce-order = true
705# Indentation per nesting level (defaults to MD007's indent value)
706indent = 2
707"#,
708        )
709        .ok()?;
710        Some(("MD073".to_string(), value))
711    }
712
713    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
714    where
715        Self: Sized,
716    {
717        let mut rule = MD073TocValidation::default();
718        let mut indent_from_md073 = false;
719
720        if let Some(rule_config) = config.rules.get("MD073") {
721            // Parse enabled (opt-in rule, defaults to false)
722            if let Some(enabled) = rule_config.values.get("enabled").and_then(|v| v.as_bool()) {
723                rule.enabled = enabled;
724            }
725
726            // Parse min-level
727            if let Some(min_level) = rule_config.values.get("min-level").and_then(|v| v.as_integer()) {
728                rule.min_level = (min_level.clamp(1, 6)) as u8;
729            }
730
731            // Parse max-level
732            if let Some(max_level) = rule_config.values.get("max-level").and_then(|v| v.as_integer()) {
733                rule.max_level = (max_level.clamp(1, 6)) as u8;
734            }
735
736            // Parse enforce-order
737            if let Some(enforce_order) = rule_config.values.get("enforce-order").and_then(|v| v.as_bool()) {
738                rule.enforce_order = enforce_order;
739            }
740
741            // Parse indent (MD073-specific override)
742            if let Some(indent) = rule_config.values.get("indent").and_then(|v| v.as_integer()) {
743                rule.indent = (indent.clamp(1, 8)) as usize;
744                indent_from_md073 = true;
745            }
746        }
747
748        // If indent not explicitly set in MD073, read from MD007 config
749        if !indent_from_md073
750            && let Some(md007_config) = config.rules.get("MD007")
751            && let Some(indent) = md007_config.values.get("indent").and_then(|v| v.as_integer())
752        {
753            rule.indent = (indent.clamp(1, 8)) as usize;
754        }
755
756        Box::new(rule)
757    }
758}
759
760#[cfg(test)]
761mod tests {
762    use super::*;
763    use crate::config::MarkdownFlavor;
764
765    fn create_ctx(content: &str) -> LintContext<'_> {
766        LintContext::new(content, MarkdownFlavor::Standard, None)
767    }
768
769    /// Create rule with enabled=true for tests that call check() directly
770    fn create_enabled_rule() -> MD073TocValidation {
771        MD073TocValidation {
772            enabled: true,
773            ..MD073TocValidation::default()
774        }
775    }
776
777    // ========== Detection Tests ==========
778
779    #[test]
780    fn test_detect_markers_basic() {
781        let rule = MD073TocValidation::new();
782        let content = r#"# Title
783
784<!-- toc -->
785
786- [Heading 1](#heading-1)
787
788<!-- tocstop -->
789
790## Heading 1
791
792Content here.
793"#;
794        let ctx = create_ctx(content);
795        let region = rule.detect_by_markers(&ctx);
796        assert!(region.is_some());
797        let region = region.unwrap();
798        // Verify region boundaries are detected correctly
799        assert_eq!(region.start_line, 4);
800        assert_eq!(region.end_line, 6);
801    }
802
803    #[test]
804    fn test_detect_markers_variations() {
805        let rule = MD073TocValidation::new();
806
807        // Test <!--toc--> (no spaces)
808        let content1 = "<!--toc-->\n- [A](#a)\n<!--tocstop-->\n";
809        let ctx1 = create_ctx(content1);
810        assert!(rule.detect_by_markers(&ctx1).is_some());
811
812        // Test <!-- TOC --> (uppercase)
813        let content2 = "<!-- TOC -->\n- [A](#a)\n<!-- TOCSTOP -->\n";
814        let ctx2 = create_ctx(content2);
815        assert!(rule.detect_by_markers(&ctx2).is_some());
816
817        // Test <!-- /toc --> (alternative stop marker)
818        let content3 = "<!-- toc -->\n- [A](#a)\n<!-- /toc -->\n";
819        let ctx3 = create_ctx(content3);
820        assert!(rule.detect_by_markers(&ctx3).is_some());
821    }
822
823    #[test]
824    fn test_no_toc_region() {
825        let rule = MD073TocValidation::new();
826        let content = r#"# Title
827
828## Heading 1
829
830Content here.
831
832## Heading 2
833
834More content.
835"#;
836        let ctx = create_ctx(content);
837        let region = rule.detect_toc_region(&ctx);
838        assert!(region.is_none());
839    }
840
841    // ========== Validation Tests ==========
842
843    #[test]
844    fn test_toc_matches_headings() {
845        let rule = create_enabled_rule();
846        let content = r#"# Title
847
848<!-- toc -->
849
850- [Heading 1](#heading-1)
851- [Heading 2](#heading-2)
852
853<!-- tocstop -->
854
855## Heading 1
856
857Content.
858
859## Heading 2
860
861More content.
862"#;
863        let ctx = create_ctx(content);
864        let result = rule.check(&ctx).unwrap();
865        assert!(result.is_empty(), "Expected no warnings for matching TOC");
866    }
867
868    #[test]
869    fn test_missing_entry() {
870        let rule = create_enabled_rule();
871        let content = r#"# Title
872
873<!-- toc -->
874
875- [Heading 1](#heading-1)
876
877<!-- tocstop -->
878
879## Heading 1
880
881Content.
882
883## Heading 2
884
885New heading not in TOC.
886"#;
887        let ctx = create_ctx(content);
888        let result = rule.check(&ctx).unwrap();
889        assert_eq!(result.len(), 1);
890        assert!(result[0].message.contains("Missing entry"));
891        assert!(result[0].message.contains("Heading 2"));
892    }
893
894    #[test]
895    fn test_stale_entry() {
896        let rule = create_enabled_rule();
897        let content = r#"# Title
898
899<!-- toc -->
900
901- [Heading 1](#heading-1)
902- [Deleted Heading](#deleted-heading)
903
904<!-- tocstop -->
905
906## Heading 1
907
908Content.
909"#;
910        let ctx = create_ctx(content);
911        let result = rule.check(&ctx).unwrap();
912        assert_eq!(result.len(), 1);
913        assert!(result[0].message.contains("Stale entry"));
914        assert!(result[0].message.contains("Deleted Heading"));
915    }
916
917    #[test]
918    fn test_text_mismatch() {
919        let rule = create_enabled_rule();
920        let content = r#"# Title
921
922<!-- toc -->
923
924- [Old Name](#heading-1)
925
926<!-- tocstop -->
927
928## Heading 1
929
930Content.
931"#;
932        let ctx = create_ctx(content);
933        let result = rule.check(&ctx).unwrap();
934        assert_eq!(result.len(), 1);
935        assert!(result[0].message.contains("Text mismatch"));
936    }
937
938    // ========== Level Filtering Tests ==========
939
940    #[test]
941    fn test_min_level_excludes_h1() {
942        let mut rule = MD073TocValidation::new();
943        rule.min_level = 2;
944
945        let content = r#"<!-- toc -->
946
947<!-- tocstop -->
948
949# Should Be Excluded
950
951## Should Be Included
952
953Content.
954"#;
955        let ctx = create_ctx(content);
956        let region = rule.detect_toc_region(&ctx).unwrap();
957        let expected = rule.build_expected_toc(&ctx, &region);
958
959        assert_eq!(expected.len(), 1);
960        assert_eq!(expected[0].text, "Should Be Included");
961    }
962
963    #[test]
964    fn test_max_level_excludes_h5_h6() {
965        let mut rule = MD073TocValidation::new();
966        rule.max_level = 4;
967
968        let content = r#"<!-- toc -->
969
970<!-- tocstop -->
971
972## Level 2
973
974### Level 3
975
976#### Level 4
977
978##### Level 5 Should Be Excluded
979
980###### Level 6 Should Be Excluded
981"#;
982        let ctx = create_ctx(content);
983        let region = rule.detect_toc_region(&ctx).unwrap();
984        let expected = rule.build_expected_toc(&ctx, &region);
985
986        assert_eq!(expected.len(), 3);
987        assert!(expected.iter().all(|e| e.level <= 4));
988    }
989
990    // ========== Fix Tests ==========
991
992    #[test]
993    fn test_fix_adds_missing_entry() {
994        let rule = MD073TocValidation::new();
995        let content = r#"# Title
996
997<!-- toc -->
998
999- [Heading 1](#heading-1)
1000
1001<!-- tocstop -->
1002
1003## Heading 1
1004
1005Content.
1006
1007## Heading 2
1008
1009New heading.
1010"#;
1011        let ctx = create_ctx(content);
1012        let fixed = rule.fix(&ctx).unwrap();
1013        assert!(fixed.contains("- [Heading 2](#heading-2)"));
1014    }
1015
1016    #[test]
1017    fn test_fix_removes_stale_entry() {
1018        let rule = MD073TocValidation::new();
1019        let content = r#"# Title
1020
1021<!-- toc -->
1022
1023- [Heading 1](#heading-1)
1024- [Deleted](#deleted)
1025
1026<!-- tocstop -->
1027
1028## Heading 1
1029
1030Content.
1031"#;
1032        let ctx = create_ctx(content);
1033        let fixed = rule.fix(&ctx).unwrap();
1034        assert!(fixed.contains("- [Heading 1](#heading-1)"));
1035        assert!(!fixed.contains("Deleted"));
1036    }
1037
1038    #[test]
1039    fn test_fix_idempotent() {
1040        let rule = MD073TocValidation::new();
1041        let content = r#"# Title
1042
1043<!-- toc -->
1044
1045- [Heading 1](#heading-1)
1046- [Heading 2](#heading-2)
1047
1048<!-- tocstop -->
1049
1050## Heading 1
1051
1052Content.
1053
1054## Heading 2
1055
1056More.
1057"#;
1058        let ctx = create_ctx(content);
1059        let fixed1 = rule.fix(&ctx).unwrap();
1060        let ctx2 = create_ctx(&fixed1);
1061        let fixed2 = rule.fix(&ctx2).unwrap();
1062
1063        // Second fix should produce same output
1064        assert_eq!(fixed1, fixed2);
1065    }
1066
1067    #[test]
1068    fn test_fix_preserves_markers() {
1069        let rule = MD073TocValidation::new();
1070        let content = r#"# Title
1071
1072<!-- toc -->
1073
1074Old TOC content.
1075
1076<!-- tocstop -->
1077
1078## New Heading
1079
1080Content.
1081"#;
1082        let ctx = create_ctx(content);
1083        let fixed = rule.fix(&ctx).unwrap();
1084
1085        // Markers should still be present
1086        assert!(fixed.contains("<!-- toc -->"));
1087        assert!(fixed.contains("<!-- tocstop -->"));
1088        // New content should be generated
1089        assert!(fixed.contains("- [New Heading](#new-heading)"));
1090    }
1091
1092    #[test]
1093    fn test_fix_requires_markers() {
1094        let rule = create_enabled_rule();
1095
1096        // Document without markers - no TOC detected, no changes
1097        let content_no_markers = r#"# Title
1098
1099## Heading 1
1100
1101Content.
1102"#;
1103        let ctx = create_ctx(content_no_markers);
1104        let fixed = rule.fix(&ctx).unwrap();
1105        assert_eq!(fixed, content_no_markers);
1106
1107        // Document with markers - TOC detected and fixed
1108        let content_markers = r#"# Title
1109
1110<!-- toc -->
1111
1112- [Old Entry](#old-entry)
1113
1114<!-- tocstop -->
1115
1116## Heading 1
1117
1118Content.
1119"#;
1120        let ctx = create_ctx(content_markers);
1121        let fixed = rule.fix(&ctx).unwrap();
1122        assert!(fixed.contains("- [Heading 1](#heading-1)"));
1123        assert!(!fixed.contains("Old Entry"));
1124    }
1125
1126    // ========== Anchor Tests ==========
1127
1128    #[test]
1129    fn test_duplicate_heading_anchors() {
1130        let rule = MD073TocValidation::new();
1131        let content = r#"# Title
1132
1133<!-- toc -->
1134
1135<!-- tocstop -->
1136
1137## Duplicate
1138
1139Content.
1140
1141## Duplicate
1142
1143More content.
1144
1145## Duplicate
1146
1147Even more.
1148"#;
1149        let ctx = create_ctx(content);
1150        let region = rule.detect_toc_region(&ctx).unwrap();
1151        let expected = rule.build_expected_toc(&ctx, &region);
1152
1153        assert_eq!(expected.len(), 3);
1154        assert_eq!(expected[0].anchor, "duplicate");
1155        assert_eq!(expected[1].anchor, "duplicate-1");
1156        assert_eq!(expected[2].anchor, "duplicate-2");
1157    }
1158
1159    // ========== Edge Cases ==========
1160
1161    #[test]
1162    fn test_headings_in_code_blocks_ignored() {
1163        let rule = create_enabled_rule();
1164        let content = r#"# Title
1165
1166<!-- toc -->
1167
1168- [Real Heading](#real-heading)
1169
1170<!-- tocstop -->
1171
1172## Real Heading
1173
1174```markdown
1175## Fake Heading In Code
1176```
1177
1178Content.
1179"#;
1180        let ctx = create_ctx(content);
1181        let result = rule.check(&ctx).unwrap();
1182        assert!(result.is_empty(), "Should not report fake heading in code block");
1183    }
1184
1185    #[test]
1186    fn test_empty_toc_region() {
1187        let rule = create_enabled_rule();
1188        let content = r#"# Title
1189
1190<!-- toc -->
1191<!-- tocstop -->
1192
1193## Heading 1
1194
1195Content.
1196"#;
1197        let ctx = create_ctx(content);
1198        let result = rule.check(&ctx).unwrap();
1199        assert_eq!(result.len(), 1);
1200        assert!(result[0].message.contains("Missing entry"));
1201    }
1202
1203    #[test]
1204    fn test_nested_indentation() {
1205        let rule = create_enabled_rule();
1206
1207        let content = r#"<!-- toc -->
1208
1209<!-- tocstop -->
1210
1211## Level 2
1212
1213### Level 3
1214
1215#### Level 4
1216
1217## Another Level 2
1218"#;
1219        let ctx = create_ctx(content);
1220        let region = rule.detect_toc_region(&ctx).unwrap();
1221        let expected = rule.build_expected_toc(&ctx, &region);
1222        let toc = rule.generate_toc(&expected);
1223
1224        // Check indentation (always nested)
1225        assert!(toc.contains("- [Level 2](#level-2)"));
1226        assert!(toc.contains("  - [Level 3](#level-3)"));
1227        assert!(toc.contains("    - [Level 4](#level-4)"));
1228        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1229    }
1230
1231    // ========== Indentation Mismatch Tests ==========
1232
1233    #[test]
1234    fn test_indentation_mismatch_detected() {
1235        let rule = create_enabled_rule();
1236        // TOC entries are all at same indentation level, but headings have different levels
1237        let content = r#"<!-- toc -->
1238- [Hello](#hello)
1239- [Another](#another)
1240- [Heading](#heading)
1241<!-- tocstop -->
1242
1243## Hello
1244
1245### Another
1246
1247## Heading
1248"#;
1249        let ctx = create_ctx(content);
1250        let result = rule.check(&ctx).unwrap();
1251        // Should detect indentation mismatch - "Another" is level 3 but has no indent
1252        assert_eq!(result.len(), 1, "Should report indentation mismatch: {result:?}");
1253        assert!(
1254            result[0].message.contains("Indentation mismatch"),
1255            "Message should mention indentation: {}",
1256            result[0].message
1257        );
1258        assert!(
1259            result[0].message.contains("Another"),
1260            "Message should mention the entry: {}",
1261            result[0].message
1262        );
1263    }
1264
1265    #[test]
1266    fn test_indentation_mismatch_fixed() {
1267        let rule = create_enabled_rule();
1268        // TOC entries are all at same indentation level, but headings have different levels
1269        let content = r#"<!-- toc -->
1270- [Hello](#hello)
1271- [Another](#another)
1272- [Heading](#heading)
1273<!-- tocstop -->
1274
1275## Hello
1276
1277### Another
1278
1279## Heading
1280"#;
1281        let ctx = create_ctx(content);
1282        let fixed = rule.fix(&ctx).unwrap();
1283        // After fix, "Another" should be indented
1284        assert!(fixed.contains("- [Hello](#hello)"));
1285        assert!(fixed.contains("  - [Another](#another)")); // Indented with 2 spaces
1286        assert!(fixed.contains("- [Heading](#heading)"));
1287    }
1288
1289    #[test]
1290    fn test_no_indentation_mismatch_when_correct() {
1291        let rule = create_enabled_rule();
1292        // TOC has correct indentation
1293        let content = r#"<!-- toc -->
1294- [Hello](#hello)
1295  - [Another](#another)
1296- [Heading](#heading)
1297<!-- tocstop -->
1298
1299## Hello
1300
1301### Another
1302
1303## Heading
1304"#;
1305        let ctx = create_ctx(content);
1306        let result = rule.check(&ctx).unwrap();
1307        // Should not report any issues - indentation is correct
1308        assert!(result.is_empty(), "Should not report issues: {result:?}");
1309    }
1310
1311    // ========== Order Mismatch Tests ==========
1312
1313    #[test]
1314    fn test_order_mismatch_detected() {
1315        let rule = create_enabled_rule();
1316        let content = r#"# Title
1317
1318<!-- toc -->
1319
1320- [Section B](#section-b)
1321- [Section A](#section-a)
1322
1323<!-- tocstop -->
1324
1325## Section A
1326
1327Content A.
1328
1329## Section B
1330
1331Content B.
1332"#;
1333        let ctx = create_ctx(content);
1334        let result = rule.check(&ctx).unwrap();
1335        // Should detect order mismatch - Section B appears before Section A in TOC
1336        // but Section A comes first in document
1337        assert!(!result.is_empty(), "Should detect order mismatch");
1338    }
1339
1340    #[test]
1341    fn test_order_mismatch_ignored_when_disabled() {
1342        let mut rule = create_enabled_rule();
1343        rule.enforce_order = false;
1344        let content = r#"# Title
1345
1346<!-- toc -->
1347
1348- [Section B](#section-b)
1349- [Section A](#section-a)
1350
1351<!-- tocstop -->
1352
1353## Section A
1354
1355Content A.
1356
1357## Section B
1358
1359Content B.
1360"#;
1361        let ctx = create_ctx(content);
1362        let result = rule.check(&ctx).unwrap();
1363        // With enforce_order=false, order mismatches should be ignored
1364        assert!(result.is_empty(), "Should not report order mismatch when disabled");
1365    }
1366
1367    // ========== Unicode and Special Characters Tests ==========
1368
1369    #[test]
1370    fn test_unicode_headings() {
1371        let rule = create_enabled_rule();
1372        let content = r#"# Title
1373
1374<!-- toc -->
1375
1376- [日本語の見出し](#日本語の見出し)
1377- [Émojis 🎉](#émojis-)
1378
1379<!-- tocstop -->
1380
1381## 日本語の見出し
1382
1383Japanese content.
1384
1385## Émojis 🎉
1386
1387Content with emojis.
1388"#;
1389        let ctx = create_ctx(content);
1390        let result = rule.check(&ctx).unwrap();
1391        // Should handle unicode correctly
1392        assert!(result.is_empty(), "Should handle unicode headings");
1393    }
1394
1395    #[test]
1396    fn test_special_characters_in_headings() {
1397        let rule = create_enabled_rule();
1398        let content = r#"# Title
1399
1400<!-- toc -->
1401
1402- [What's New?](#whats-new)
1403- [C++ Guide](#c-guide)
1404
1405<!-- tocstop -->
1406
1407## What's New?
1408
1409News content.
1410
1411## C++ Guide
1412
1413C++ content.
1414"#;
1415        let ctx = create_ctx(content);
1416        let result = rule.check(&ctx).unwrap();
1417        assert!(result.is_empty(), "Should handle special characters");
1418    }
1419
1420    #[test]
1421    fn test_code_spans_in_headings() {
1422        let rule = create_enabled_rule();
1423        let content = r#"# Title
1424
1425<!-- toc -->
1426
1427- [`check [PATHS...]`](#check-paths)
1428
1429<!-- tocstop -->
1430
1431## `check [PATHS...]`
1432
1433Command documentation.
1434"#;
1435        let ctx = create_ctx(content);
1436        let result = rule.check(&ctx).unwrap();
1437        assert!(result.is_empty(), "Should handle code spans in headings with brackets");
1438    }
1439
1440    // ========== Config Tests ==========
1441
1442    #[test]
1443    fn test_from_config_defaults() {
1444        let config = crate::config::Config::default();
1445        let rule = MD073TocValidation::from_config(&config);
1446        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1447
1448        assert_eq!(rule.min_level, 2);
1449        assert_eq!(rule.max_level, 4);
1450        assert!(rule.enforce_order);
1451        assert_eq!(rule.indent, 2);
1452    }
1453
1454    #[test]
1455    fn test_indent_from_md007_config() {
1456        use crate::config::{Config, RuleConfig};
1457        use std::collections::BTreeMap;
1458
1459        let mut config = Config::default();
1460
1461        // Set MD007 indent to 4
1462        let mut md007_values = BTreeMap::new();
1463        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1464        config.rules.insert(
1465            "MD007".to_string(),
1466            RuleConfig {
1467                severity: None,
1468                values: md007_values,
1469            },
1470        );
1471
1472        let rule = MD073TocValidation::from_config(&config);
1473        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1474
1475        assert_eq!(rule.indent, 4, "Should read indent from MD007 config");
1476    }
1477
1478    #[test]
1479    fn test_indent_md073_overrides_md007() {
1480        use crate::config::{Config, RuleConfig};
1481        use std::collections::BTreeMap;
1482
1483        let mut config = Config::default();
1484
1485        // Set MD007 indent to 4
1486        let mut md007_values = BTreeMap::new();
1487        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1488        config.rules.insert(
1489            "MD007".to_string(),
1490            RuleConfig {
1491                severity: None,
1492                values: md007_values,
1493            },
1494        );
1495
1496        // Set MD073 indent to 3 (should override MD007)
1497        let mut md073_values = BTreeMap::new();
1498        md073_values.insert("enabled".to_string(), toml::Value::Boolean(true));
1499        md073_values.insert("indent".to_string(), toml::Value::Integer(3));
1500        config.rules.insert(
1501            "MD073".to_string(),
1502            RuleConfig {
1503                severity: None,
1504                values: md073_values,
1505            },
1506        );
1507
1508        let rule = MD073TocValidation::from_config(&config);
1509        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1510
1511        assert_eq!(rule.indent, 3, "MD073 indent should override MD007");
1512    }
1513
1514    #[test]
1515    fn test_generate_toc_with_4_space_indent() {
1516        let mut rule = create_enabled_rule();
1517        rule.indent = 4;
1518
1519        let content = r#"<!-- toc -->
1520
1521<!-- tocstop -->
1522
1523## Level 2
1524
1525### Level 3
1526
1527#### Level 4
1528
1529## Another Level 2
1530"#;
1531        let ctx = create_ctx(content);
1532        let region = rule.detect_toc_region(&ctx).unwrap();
1533        let expected = rule.build_expected_toc(&ctx, &region);
1534        let toc = rule.generate_toc(&expected);
1535
1536        // With 4-space indent:
1537        // Level 2 = 0 spaces (base level)
1538        // Level 3 = 4 spaces
1539        // Level 4 = 8 spaces
1540        assert!(toc.contains("- [Level 2](#level-2)"), "Level 2 should have no indent");
1541        assert!(
1542            toc.contains("    - [Level 3](#level-3)"),
1543            "Level 3 should have 4-space indent"
1544        );
1545        assert!(
1546            toc.contains("        - [Level 4](#level-4)"),
1547            "Level 4 should have 8-space indent"
1548        );
1549        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1550    }
1551
1552    #[test]
1553    fn test_validate_toc_with_4_space_indent() {
1554        let mut rule = create_enabled_rule();
1555        rule.indent = 4;
1556
1557        // TOC with correct 4-space indentation
1558        let content = r#"<!-- toc -->
1559- [Hello](#hello)
1560    - [Another](#another)
1561- [Heading](#heading)
1562<!-- tocstop -->
1563
1564## Hello
1565
1566### Another
1567
1568## Heading
1569"#;
1570        let ctx = create_ctx(content);
1571        let result = rule.check(&ctx).unwrap();
1572        assert!(
1573            result.is_empty(),
1574            "Should accept 4-space indent when configured: {result:?}"
1575        );
1576    }
1577
1578    #[test]
1579    fn test_validate_toc_wrong_indent_with_4_space_config() {
1580        let mut rule = create_enabled_rule();
1581        rule.indent = 4;
1582
1583        // TOC with 2-space indentation (wrong when 4-space is configured)
1584        let content = r#"<!-- toc -->
1585- [Hello](#hello)
1586  - [Another](#another)
1587- [Heading](#heading)
1588<!-- tocstop -->
1589
1590## Hello
1591
1592### Another
1593
1594## Heading
1595"#;
1596        let ctx = create_ctx(content);
1597        let result = rule.check(&ctx).unwrap();
1598        assert_eq!(result.len(), 1, "Should detect wrong indent");
1599        assert!(
1600            result[0].message.contains("Indentation mismatch"),
1601            "Should report indentation mismatch: {}",
1602            result[0].message
1603        );
1604        assert!(
1605            result[0].message.contains("expected 4 spaces"),
1606            "Should mention expected 4 spaces: {}",
1607            result[0].message
1608        );
1609    }
1610
1611    // ========== Markdown Stripping Tests ==========
1612
1613    #[test]
1614    fn test_strip_markdown_formatting_link() {
1615        let result = strip_markdown_formatting("Tool: [terminal](https://example.com)");
1616        assert_eq!(result, "Tool: terminal");
1617    }
1618
1619    #[test]
1620    fn test_strip_markdown_formatting_bold() {
1621        let result = strip_markdown_formatting("This is **bold** text");
1622        assert_eq!(result, "This is bold text");
1623
1624        let result = strip_markdown_formatting("This is __bold__ text");
1625        assert_eq!(result, "This is bold text");
1626    }
1627
1628    #[test]
1629    fn test_strip_markdown_formatting_italic() {
1630        let result = strip_markdown_formatting("This is *italic* text");
1631        assert_eq!(result, "This is italic text");
1632
1633        let result = strip_markdown_formatting("This is _italic_ text");
1634        assert_eq!(result, "This is italic text");
1635    }
1636
1637    #[test]
1638    fn test_strip_markdown_formatting_code_span() {
1639        let result = strip_markdown_formatting("Use the `format` function");
1640        assert_eq!(result, "Use the format function");
1641    }
1642
1643    #[test]
1644    fn test_strip_markdown_formatting_image() {
1645        let result = strip_markdown_formatting("See ![logo](image.png) for details");
1646        assert_eq!(result, "See logo for details");
1647    }
1648
1649    #[test]
1650    fn test_strip_markdown_formatting_reference_link() {
1651        let result = strip_markdown_formatting("See [documentation][docs] for details");
1652        assert_eq!(result, "See documentation for details");
1653    }
1654
1655    #[test]
1656    fn test_strip_markdown_formatting_combined() {
1657        // Link is stripped first, leaving bold, then bold is stripped
1658        let result = strip_markdown_formatting("Tool: [**terminal**](https://example.com)");
1659        assert_eq!(result, "Tool: terminal");
1660    }
1661
1662    #[test]
1663    fn test_toc_with_link_in_heading_matches_stripped_text() {
1664        let rule = create_enabled_rule();
1665
1666        // TOC entry text matches the stripped heading text
1667        let content = r#"# Title
1668
1669<!-- toc -->
1670
1671- [Tool: terminal](#tool-terminal)
1672
1673<!-- tocstop -->
1674
1675## Tool: [terminal](https://example.com)
1676
1677Content here.
1678"#;
1679        let ctx = create_ctx(content);
1680        let result = rule.check(&ctx).unwrap();
1681        assert!(
1682            result.is_empty(),
1683            "Stripped heading text should match TOC entry: {result:?}"
1684        );
1685    }
1686
1687    #[test]
1688    fn test_toc_with_simplified_text_still_mismatches() {
1689        let rule = create_enabled_rule();
1690
1691        // TOC entry "terminal" does NOT match stripped heading "Tool: terminal"
1692        let content = r#"# Title
1693
1694<!-- toc -->
1695
1696- [terminal](#tool-terminal)
1697
1698<!-- tocstop -->
1699
1700## Tool: [terminal](https://example.com)
1701
1702Content here.
1703"#;
1704        let ctx = create_ctx(content);
1705        let result = rule.check(&ctx).unwrap();
1706        assert_eq!(result.len(), 1, "Should report text mismatch");
1707        assert!(result[0].message.contains("Text mismatch"));
1708    }
1709
1710    #[test]
1711    fn test_fix_generates_stripped_toc_entries() {
1712        let rule = MD073TocValidation::new();
1713        let content = r#"# Title
1714
1715<!-- toc -->
1716
1717<!-- tocstop -->
1718
1719## Tool: [busybox](https://www.busybox.net/)
1720
1721Content.
1722
1723## Tool: [mount](https://en.wikipedia.org/wiki/Mount)
1724
1725More content.
1726"#;
1727        let ctx = create_ctx(content);
1728        let fixed = rule.fix(&ctx).unwrap();
1729
1730        // Generated TOC should have stripped text (links removed)
1731        assert!(
1732            fixed.contains("- [Tool: busybox](#tool-busybox)"),
1733            "TOC entry should have stripped link text"
1734        );
1735        assert!(
1736            fixed.contains("- [Tool: mount](#tool-mount)"),
1737            "TOC entry should have stripped link text"
1738        );
1739        // TOC entries should NOT contain the URL (the actual headings in the document still will)
1740        // Check only within the TOC region (between toc markers)
1741        let toc_start = fixed.find("<!-- toc -->").unwrap();
1742        let toc_end = fixed.find("<!-- tocstop -->").unwrap();
1743        let toc_content = &fixed[toc_start..toc_end];
1744        assert!(
1745            !toc_content.contains("busybox.net"),
1746            "TOC should not contain URLs: {toc_content}"
1747        );
1748        assert!(
1749            !toc_content.contains("wikipedia.org"),
1750            "TOC should not contain URLs: {toc_content}"
1751        );
1752    }
1753
1754    #[test]
1755    fn test_fix_with_bold_in_heading() {
1756        let rule = MD073TocValidation::new();
1757        let content = r#"# Title
1758
1759<!-- toc -->
1760
1761<!-- tocstop -->
1762
1763## **Important** Section
1764
1765Content.
1766"#;
1767        let ctx = create_ctx(content);
1768        let fixed = rule.fix(&ctx).unwrap();
1769
1770        // Generated TOC should have stripped text (bold markers removed)
1771        assert!(fixed.contains("- [Important Section](#important-section)"));
1772    }
1773
1774    #[test]
1775    fn test_fix_with_code_in_heading() {
1776        let rule = MD073TocValidation::new();
1777        let content = r#"# Title
1778
1779<!-- toc -->
1780
1781<!-- tocstop -->
1782
1783## Using `async` Functions
1784
1785Content.
1786"#;
1787        let ctx = create_ctx(content);
1788        let fixed = rule.fix(&ctx).unwrap();
1789
1790        // Generated TOC should have stripped text (backticks removed)
1791        assert!(fixed.contains("- [Using async Functions](#using-async-functions)"));
1792    }
1793
1794    // ========== Custom Anchor Tests ==========
1795
1796    #[test]
1797    fn test_custom_anchor_id_respected() {
1798        let rule = create_enabled_rule();
1799        let content = r#"# Title
1800
1801<!-- toc -->
1802
1803- [My Section](#my-custom-anchor)
1804
1805<!-- tocstop -->
1806
1807## My Section {#my-custom-anchor}
1808
1809Content here.
1810"#;
1811        let ctx = create_ctx(content);
1812        let result = rule.check(&ctx).unwrap();
1813        assert!(result.is_empty(), "Should respect custom anchor IDs: {result:?}");
1814    }
1815
1816    #[test]
1817    fn test_custom_anchor_id_in_generated_toc() {
1818        let rule = create_enabled_rule();
1819        let content = r#"# Title
1820
1821<!-- toc -->
1822
1823<!-- tocstop -->
1824
1825## First Section {#custom-first}
1826
1827Content.
1828
1829## Second Section {#another-custom}
1830
1831More content.
1832"#;
1833        let ctx = create_ctx(content);
1834        let fixed = rule.fix(&ctx).unwrap();
1835        assert!(fixed.contains("- [First Section](#custom-first)"));
1836        assert!(fixed.contains("- [Second Section](#another-custom)"));
1837    }
1838
1839    #[test]
1840    fn test_mixed_custom_and_generated_anchors() {
1841        let rule = create_enabled_rule();
1842        let content = r#"# Title
1843
1844<!-- toc -->
1845
1846- [Custom Section](#my-id)
1847- [Normal Section](#normal-section)
1848
1849<!-- tocstop -->
1850
1851## Custom Section {#my-id}
1852
1853Content.
1854
1855## Normal Section
1856
1857More content.
1858"#;
1859        let ctx = create_ctx(content);
1860        let result = rule.check(&ctx).unwrap();
1861        assert!(result.is_empty(), "Should handle mixed custom and generated anchors");
1862    }
1863
1864    // ========== Anchor Generation Tests ==========
1865
1866    #[test]
1867    fn test_github_anchor_style() {
1868        let rule = create_enabled_rule();
1869
1870        let content = r#"<!-- toc -->
1871
1872<!-- tocstop -->
1873
1874## Test_With_Underscores
1875
1876Content.
1877"#;
1878        let ctx = create_ctx(content);
1879        let region = rule.detect_toc_region(&ctx).unwrap();
1880        let expected = rule.build_expected_toc(&ctx, &region);
1881
1882        // GitHub-style anchors preserve underscores
1883        assert_eq!(expected[0].anchor, "test_with_underscores");
1884    }
1885
1886    // ========== Stress Tests ==========
1887
1888    #[test]
1889    fn test_stress_many_headings() {
1890        let rule = create_enabled_rule();
1891
1892        // Generate a document with 150 headings
1893        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1894
1895        for i in 1..=150 {
1896            content.push_str(&format!("## Heading Number {i}\n\nContent for section {i}.\n\n"));
1897        }
1898
1899        let ctx = create_ctx(&content);
1900
1901        // Should not panic or timeout
1902        let result = rule.check(&ctx).unwrap();
1903
1904        // Should report missing entries for all 150 headings
1905        assert_eq!(result.len(), 1, "Should report single warning for TOC");
1906        assert!(result[0].message.contains("Missing entry"));
1907
1908        // Fix should generate TOC with 150 entries
1909        let fixed = rule.fix(&ctx).unwrap();
1910        assert!(fixed.contains("- [Heading Number 1](#heading-number-1)"));
1911        assert!(fixed.contains("- [Heading Number 100](#heading-number-100)"));
1912        assert!(fixed.contains("- [Heading Number 150](#heading-number-150)"));
1913    }
1914
1915    #[test]
1916    fn test_stress_deeply_nested() {
1917        let rule = create_enabled_rule();
1918        let content = r#"# Title
1919
1920<!-- toc -->
1921
1922<!-- tocstop -->
1923
1924## Level 2 A
1925
1926### Level 3 A
1927
1928#### Level 4 A
1929
1930## Level 2 B
1931
1932### Level 3 B
1933
1934#### Level 4 B
1935
1936## Level 2 C
1937
1938### Level 3 C
1939
1940#### Level 4 C
1941
1942## Level 2 D
1943
1944### Level 3 D
1945
1946#### Level 4 D
1947"#;
1948        let ctx = create_ctx(content);
1949        let fixed = rule.fix(&ctx).unwrap();
1950
1951        // Check nested indentation is correct
1952        assert!(fixed.contains("- [Level 2 A](#level-2-a)"));
1953        assert!(fixed.contains("  - [Level 3 A](#level-3-a)"));
1954        assert!(fixed.contains("    - [Level 4 A](#level-4-a)"));
1955        assert!(fixed.contains("- [Level 2 D](#level-2-d)"));
1956        assert!(fixed.contains("  - [Level 3 D](#level-3-d)"));
1957        assert!(fixed.contains("    - [Level 4 D](#level-4-d)"));
1958    }
1959
1960    // ==================== Duplicate TOC anchors ====================
1961
1962    #[test]
1963    fn test_duplicate_toc_anchors_produce_correct_diagnostics() {
1964        let rule = create_enabled_rule();
1965        // Document has headings "Example", "Another", "Example" which produce anchors:
1966        // "example", "another", "example-1"
1967        // TOC incorrectly uses #example twice instead of #example and #example-1
1968        let content = r#"# Document
1969
1970<!-- toc -->
1971
1972- [Example](#example)
1973- [Another](#another)
1974- [Example](#example)
1975
1976<!-- tocstop -->
1977
1978## Example
1979First.
1980
1981## Another
1982Middle.
1983
1984## Example
1985Second.
1986"#;
1987        let ctx = create_ctx(content);
1988        let result = rule.check(&ctx).unwrap();
1989
1990        // The TOC has #example twice but expected has #example and #example-1.
1991        // Should report that #example-1 is missing from the TOC.
1992        assert!(!result.is_empty(), "Should detect mismatch with duplicate TOC anchors");
1993        assert!(
1994            result[0].message.contains("Missing entry") || result[0].message.contains("Stale entry"),
1995            "Should report missing or stale entries for duplicate anchors. Got: {}",
1996            result[0].message
1997        );
1998    }
1999
2000    // ==================== Multi-backtick code spans ====================
2001
2002    #[test]
2003    fn test_strip_double_backtick_code_span() {
2004        // Double-backtick code spans should be stripped
2005        let result = strip_markdown_formatting("Using ``code with ` backtick``");
2006        assert_eq!(
2007            result, "Using code with ` backtick",
2008            "Should strip double-backtick code spans"
2009        );
2010    }
2011
2012    #[test]
2013    fn test_strip_triple_backtick_code_span() {
2014        // Triple-backtick code spans should be stripped
2015        let result = strip_markdown_formatting("Using ```code with `` backticks```");
2016        assert_eq!(
2017            result, "Using code with `` backticks",
2018            "Should strip triple-backtick code spans"
2019        );
2020    }
2021
2022    #[test]
2023    fn test_toc_with_double_backtick_heading() {
2024        let rule = create_enabled_rule();
2025        let content = r#"# Title
2026
2027<!-- toc -->
2028
2029- [Using code with backtick](#using-code-with-backtick)
2030
2031<!-- tocstop -->
2032
2033## Using ``code with ` backtick``
2034
2035Content here.
2036"#;
2037        let ctx = create_ctx(content);
2038        // The heading uses double-backtick code span: ``code with ` backtick``
2039        // After stripping, heading text = "Using code with ` backtick"
2040        // The fix should produce a TOC entry with the stripped text
2041        let fixed = rule.fix(&ctx).unwrap();
2042        // The generated TOC should have the stripped heading text
2043        assert!(
2044            fixed.contains("code with ` backtick") || fixed.contains("code with backtick"),
2045            "Fix should strip double-backtick code span from heading. Got TOC: {}",
2046            &fixed[fixed.find("<!-- toc -->").unwrap()..fixed.find("<!-- tocstop -->").unwrap()]
2047        );
2048    }
2049
2050    #[test]
2051    fn test_stress_many_duplicates() {
2052        let rule = create_enabled_rule();
2053
2054        // Generate 50 headings with the same text
2055        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
2056        for _ in 0..50 {
2057            content.push_str("## FAQ\n\nContent.\n\n");
2058        }
2059
2060        let ctx = create_ctx(&content);
2061        let region = rule.detect_toc_region(&ctx).unwrap();
2062        let expected = rule.build_expected_toc(&ctx, &region);
2063
2064        // Should generate unique anchors for all 50
2065        assert_eq!(expected.len(), 50);
2066        assert_eq!(expected[0].anchor, "faq");
2067        assert_eq!(expected[1].anchor, "faq-1");
2068        assert_eq!(expected[49].anchor, "faq-49");
2069    }
2070}