Skip to main content

rumdl_lib/rules/
md073_toc_validation.rs

1//! MD073: Table of Contents validation rule
2//!
3//! Validates that TOC sections match the actual document headings.
4
5use crate::lint_context::LintContext;
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::anchor_styles::AnchorStyle;
8use regex::Regex;
9use std::collections::HashMap;
10use std::sync::LazyLock;
11
12/// Regex for TOC start marker: `<!-- toc -->` with optional whitespace variations
13static TOC_START_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*toc\s*-->").unwrap());
14
15/// Regex for TOC stop marker: `<!-- tocstop -->` or `<!-- /toc -->`
16static TOC_STOP_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*(?:tocstop|/toc)\s*-->").unwrap());
17
18/// Regex for extracting TOC entries: `- [text](#anchor)` or `* [text](#anchor)`
19/// with optional leading whitespace for nested items
20/// Handles nested brackets like `[`check [PATHS...]`](#check-paths)`
21static TOC_ENTRY_PATTERN: LazyLock<Regex> =
22    LazyLock::new(|| Regex::new(r"^(\s*)[-*]\s+\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]\(#([^)]+)\)").unwrap());
23
24/// Represents a detected TOC region in the document
25#[derive(Debug, Clone)]
26struct TocRegion {
27    /// 1-indexed start line of the TOC content (after the marker)
28    start_line: usize,
29    /// 1-indexed end line of the TOC content (before the stop marker)
30    end_line: usize,
31    /// Byte offset where TOC content starts
32    content_start: usize,
33    /// Byte offset where TOC content ends
34    content_end: usize,
35}
36
37/// A parsed TOC entry from the existing TOC
38#[derive(Debug, Clone)]
39struct TocEntry {
40    /// Display text of the link
41    text: String,
42    /// Anchor/fragment (without #)
43    anchor: String,
44    /// Number of leading whitespace characters (for indentation checking)
45    indent_spaces: usize,
46}
47
48/// An expected TOC entry generated from document headings
49#[derive(Debug, Clone)]
50struct ExpectedTocEntry {
51    /// 1-indexed line number of the heading
52    heading_line: usize,
53    /// Heading level (1-6)
54    level: u8,
55    /// Heading text (for display)
56    text: String,
57    /// Generated anchor
58    anchor: String,
59}
60
61/// Types of mismatches between actual and expected TOC
62#[derive(Debug)]
63enum TocMismatch {
64    /// Entry exists in TOC but heading doesn't exist
65    StaleEntry { entry: TocEntry },
66    /// Heading exists but no TOC entry for it
67    MissingEntry { expected: ExpectedTocEntry },
68    /// TOC entry text doesn't match heading text
69    TextMismatch {
70        entry: TocEntry,
71        expected: ExpectedTocEntry,
72    },
73    /// TOC entries are in wrong order
74    OrderMismatch { entry: TocEntry, expected_position: usize },
75    /// TOC entry has wrong indentation level
76    IndentationMismatch {
77        entry: TocEntry,
78        actual_indent: usize,
79        expected_indent: usize,
80    },
81}
82
83/// Regex patterns for stripping markdown formatting from heading text
84static MARKDOWN_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\([^)]+\)").unwrap());
85static MARKDOWN_REF_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[[^\]]*\]").unwrap());
86static MARKDOWN_IMAGE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").unwrap());
87static MARKDOWN_CODE_SPAN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`([^`]+)`").unwrap());
88static MARKDOWN_BOLD_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*([^*]+)\*\*").unwrap());
89static MARKDOWN_BOLD_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__([^_]+)__").unwrap());
90static MARKDOWN_ITALIC_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+)\*").unwrap());
91// Match underscore italic at word boundaries (space or start/end)
92// Handles: "_text_", " _text_ ", "start _text_", "_text_ end"
93static MARKDOWN_ITALIC_UNDERSCORE: LazyLock<Regex> =
94    LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])_([^_]+)_([^a-zA-Z0-9]|$)").unwrap());
95
96/// Strip markdown formatting from text, preserving plain text content.
97/// Used for TOC entry display text.
98///
99/// Examples:
100/// - `[terminal](url)` → `terminal`
101/// - `**bold**` → `bold`
102/// - `` `code` `` → `code`
103/// - `Tool: [terminal](url)` → `Tool: terminal`
104fn strip_markdown_formatting(text: &str) -> String {
105    let mut result = text.to_string();
106
107    // Strip images first (before links, since images use similar syntax)
108    result = MARKDOWN_IMAGE.replace_all(&result, "$1").to_string();
109
110    // Strip links: [text](url) → text
111    result = MARKDOWN_LINK.replace_all(&result, "$1").to_string();
112
113    // Strip reference links: [text][ref] → text
114    result = MARKDOWN_REF_LINK.replace_all(&result, "$1").to_string();
115
116    // Strip code spans: `code` → code
117    result = MARKDOWN_CODE_SPAN.replace_all(&result, "$1").to_string();
118
119    // Strip bold (do double before single to handle nested)
120    result = MARKDOWN_BOLD_ASTERISK.replace_all(&result, "$1").to_string();
121    result = MARKDOWN_BOLD_UNDERSCORE.replace_all(&result, "$1").to_string();
122
123    // Strip italic
124    result = MARKDOWN_ITALIC_ASTERISK.replace_all(&result, "$1").to_string();
125    // Underscore italic: preserve boundary chars, extract content
126    result = MARKDOWN_ITALIC_UNDERSCORE.replace_all(&result, "$1$2$3").to_string();
127
128    result
129}
130
131/// MD073: Table of Contents Validation
132///
133/// This rule validates that TOC sections match the actual document headings.
134/// It detects TOC regions via markers (`<!-- toc -->...<!-- tocstop -->`).
135///
136/// To opt into TOC validation, add markers to your document:
137/// ```markdown
138/// <!-- toc -->
139/// - [Section](#section)
140/// <!-- tocstop -->
141/// ```
142///
143/// ## Configuration
144///
145/// ```toml
146/// [MD073]
147/// # Enable the rule (opt-in, disabled by default)
148/// enabled = true
149/// # Minimum heading level to include (default: 2)
150/// min-level = 2
151/// # Maximum heading level to include (default: 4)
152/// max-level = 4
153/// # Whether TOC order must match document order (default: true)
154/// enforce-order = true
155/// # Indent size per nesting level (default: from MD007 config, or 2)
156/// indent = 2
157/// ```
158#[derive(Clone)]
159pub struct MD073TocValidation {
160    /// Whether this rule is enabled (default: false - opt-in rule)
161    enabled: bool,
162    /// Minimum heading level to include
163    min_level: u8,
164    /// Maximum heading level to include
165    max_level: u8,
166    /// Whether to enforce order matching
167    enforce_order: bool,
168    /// Indent size per nesting level (reads from MD007 config by default)
169    pub indent: usize,
170}
171
172impl Default for MD073TocValidation {
173    fn default() -> Self {
174        Self {
175            enabled: false, // Disabled by default - opt-in rule
176            min_level: 2,
177            max_level: 4,
178            enforce_order: true,
179            indent: 2, // Default indent, can be overridden by MD007 config
180        }
181    }
182}
183
184impl std::fmt::Debug for MD073TocValidation {
185    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
186        f.debug_struct("MD073TocValidation")
187            .field("enabled", &self.enabled)
188            .field("min_level", &self.min_level)
189            .field("max_level", &self.max_level)
190            .field("enforce_order", &self.enforce_order)
191            .field("indent", &self.indent)
192            .finish()
193    }
194}
195
196impl MD073TocValidation {
197    /// Create a new rule with default settings
198    pub fn new() -> Self {
199        Self::default()
200    }
201
202    /// Detect TOC region using markers
203    fn detect_by_markers(&self, ctx: &LintContext) -> Option<TocRegion> {
204        let mut start_line = None;
205        let mut start_byte = None;
206
207        for (idx, line_info) in ctx.lines.iter().enumerate() {
208            let line_num = idx + 1;
209            let content = line_info.content(ctx.content);
210
211            // Skip if in code block or front matter
212            if line_info.in_code_block || line_info.in_front_matter {
213                continue;
214            }
215
216            // Look for start marker or stop marker
217            if let (Some(s_line), Some(s_byte)) = (start_line, start_byte) {
218                // We have a start, now look for stop marker
219                if TOC_STOP_MARKER.is_match(content) {
220                    let end_line = line_num - 1;
221                    let content_end = line_info.byte_offset;
222
223                    // Handle case where there's no content between markers
224                    if end_line < s_line {
225                        return Some(TocRegion {
226                            start_line: s_line,
227                            end_line: s_line,
228                            content_start: s_byte,
229                            content_end: s_byte,
230                        });
231                    }
232
233                    return Some(TocRegion {
234                        start_line: s_line,
235                        end_line,
236                        content_start: s_byte,
237                        content_end,
238                    });
239                }
240            } else if TOC_START_MARKER.is_match(content) {
241                // TOC content starts on the next line
242                if idx + 1 < ctx.lines.len() {
243                    start_line = Some(line_num + 1);
244                    start_byte = Some(ctx.lines[idx + 1].byte_offset);
245                }
246            }
247        }
248
249        None
250    }
251
252    /// Detect TOC region using markers
253    fn detect_toc_region(&self, ctx: &LintContext) -> Option<TocRegion> {
254        self.detect_by_markers(ctx)
255    }
256
257    /// Extract TOC entries from the detected region
258    fn extract_toc_entries(&self, ctx: &LintContext, region: &TocRegion) -> Vec<TocEntry> {
259        let mut entries = Vec::new();
260
261        for idx in (region.start_line - 1)..region.end_line.min(ctx.lines.len()) {
262            let line_info = &ctx.lines[idx];
263            let content = line_info.content(ctx.content);
264
265            if let Some(caps) = TOC_ENTRY_PATTERN.captures(content) {
266                let indent_spaces = caps.get(1).map_or(0, |m| m.as_str().len());
267                let text = caps.get(2).map_or("", |m| m.as_str()).to_string();
268                let anchor = caps.get(3).map_or("", |m| m.as_str()).to_string();
269
270                entries.push(TocEntry {
271                    text,
272                    anchor,
273                    indent_spaces,
274                });
275            }
276        }
277
278        entries
279    }
280
281    /// Build expected TOC entries from document headings
282    fn build_expected_toc(&self, ctx: &LintContext, toc_region: &TocRegion) -> Vec<ExpectedTocEntry> {
283        let mut entries = Vec::new();
284        let mut fragment_counts: HashMap<String, usize> = HashMap::new();
285
286        for (idx, line_info) in ctx.lines.iter().enumerate() {
287            let line_num = idx + 1;
288
289            // Skip headings before/within the TOC region
290            if line_num <= toc_region.end_line {
291                // Also skip the TOC heading itself for heading-based detection
292                continue;
293            }
294
295            // Skip code blocks, front matter, HTML blocks
296            if line_info.in_code_block || line_info.in_front_matter || line_info.in_html_block {
297                continue;
298            }
299
300            if let Some(heading) = &line_info.heading {
301                // Filter by min/max level
302                if heading.level < self.min_level || heading.level > self.max_level {
303                    continue;
304                }
305
306                // Use custom ID if available, otherwise generate GitHub-style anchor
307                let base_anchor = if let Some(custom_id) = &heading.custom_id {
308                    custom_id.clone()
309                } else {
310                    AnchorStyle::GitHub.generate_fragment(&heading.text)
311                };
312
313                // Handle duplicate anchors
314                let anchor = if let Some(count) = fragment_counts.get_mut(&base_anchor) {
315                    let suffix = *count;
316                    *count += 1;
317                    format!("{base_anchor}-{suffix}")
318                } else {
319                    fragment_counts.insert(base_anchor.clone(), 1);
320                    base_anchor
321                };
322
323                entries.push(ExpectedTocEntry {
324                    heading_line: line_num,
325                    level: heading.level,
326                    text: heading.text.clone(),
327                    anchor,
328                });
329            }
330        }
331
332        entries
333    }
334
335    /// Compare actual TOC entries against expected and find mismatches
336    fn validate_toc(&self, actual: &[TocEntry], expected: &[ExpectedTocEntry]) -> Vec<TocMismatch> {
337        let mut mismatches = Vec::new();
338
339        // Build a map of expected anchors
340        let expected_anchors: HashMap<&str, &ExpectedTocEntry> =
341            expected.iter().map(|e| (e.anchor.as_str(), e)).collect();
342
343        // Build a map of actual anchors
344        let actual_anchors: HashMap<&str, &TocEntry> = actual.iter().map(|e| (e.anchor.as_str(), e)).collect();
345
346        // Check for stale entries (in TOC but not in expected)
347        for entry in actual {
348            if !expected_anchors.contains_key(entry.anchor.as_str()) {
349                mismatches.push(TocMismatch::StaleEntry { entry: entry.clone() });
350            }
351        }
352
353        // Check for missing entries (in expected but not in TOC)
354        for exp in expected {
355            if !actual_anchors.contains_key(exp.anchor.as_str()) {
356                mismatches.push(TocMismatch::MissingEntry { expected: exp.clone() });
357            }
358        }
359
360        // Check for text mismatches (compare stripped versions)
361        for entry in actual {
362            if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
363                // Compare stripped text (removes markdown formatting like links, emphasis)
364                let actual_stripped = strip_markdown_formatting(entry.text.trim());
365                let expected_stripped = strip_markdown_formatting(exp.text.trim());
366                if actual_stripped != expected_stripped {
367                    mismatches.push(TocMismatch::TextMismatch {
368                        entry: entry.clone(),
369                        expected: (*exp).clone(),
370                    });
371                }
372            }
373        }
374
375        // Check for indentation mismatches
376        // Expected indentation is indent spaces per level difference from base level
377        if !expected.is_empty() {
378            let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
379
380            for entry in actual {
381                if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
382                    let level_diff = exp.level.saturating_sub(base_level) as usize;
383                    let expected_indent = level_diff * self.indent;
384
385                    if entry.indent_spaces != expected_indent {
386                        // Don't report indentation mismatch if already reported as text mismatch
387                        let already_reported = mismatches.iter().any(|m| match m {
388                            TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
389                            TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
390                            _ => false,
391                        });
392                        if !already_reported {
393                            mismatches.push(TocMismatch::IndentationMismatch {
394                                entry: entry.clone(),
395                                actual_indent: entry.indent_spaces,
396                                expected_indent,
397                            });
398                        }
399                    }
400                }
401            }
402        }
403
404        // Check order if enforce_order is enabled
405        if self.enforce_order && !actual.is_empty() && !expected.is_empty() {
406            let expected_order: Vec<&str> = expected.iter().map(|e| e.anchor.as_str()).collect();
407
408            // Find entries that exist in both but are out of order
409            let mut expected_idx = 0;
410            for entry in actual {
411                // Skip entries that don't exist in expected
412                if !expected_anchors.contains_key(entry.anchor.as_str()) {
413                    continue;
414                }
415
416                // Find where this anchor should be
417                while expected_idx < expected_order.len() && expected_order[expected_idx] != entry.anchor {
418                    expected_idx += 1;
419                }
420
421                if expected_idx >= expected_order.len() {
422                    // This entry is after where it should be
423                    let correct_pos = expected_order.iter().position(|a| *a == entry.anchor).unwrap_or(0);
424                    // Only add order mismatch if not already reported as stale/text mismatch
425                    let already_reported = mismatches.iter().any(|m| match m {
426                        TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
427                        TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
428                        _ => false,
429                    });
430                    if !already_reported {
431                        mismatches.push(TocMismatch::OrderMismatch {
432                            entry: entry.clone(),
433                            expected_position: correct_pos + 1,
434                        });
435                    }
436                } else {
437                    expected_idx += 1;
438                }
439            }
440        }
441
442        mismatches
443    }
444
445    /// Generate a new TOC from expected entries (always uses nested indentation)
446    fn generate_toc(&self, expected: &[ExpectedTocEntry]) -> String {
447        if expected.is_empty() {
448            return String::new();
449        }
450
451        let mut result = String::new();
452        let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
453        let indent_str = " ".repeat(self.indent);
454
455        for entry in expected {
456            let level_diff = entry.level.saturating_sub(base_level) as usize;
457            let indent = indent_str.repeat(level_diff);
458
459            // Strip markdown formatting from heading text for clean TOC entries
460            let display_text = strip_markdown_formatting(&entry.text);
461            result.push_str(&format!("{indent}- [{display_text}](#{})\n", entry.anchor));
462        }
463
464        result
465    }
466}
467
468impl Rule for MD073TocValidation {
469    fn name(&self) -> &'static str {
470        "MD073"
471    }
472
473    fn description(&self) -> &'static str {
474        "Table of Contents should match document headings"
475    }
476
477    fn should_skip(&self, ctx: &LintContext) -> bool {
478        // Skip if rule is disabled (opt-in rule)
479        if !self.enabled {
480            return true;
481        }
482
483        // Quick check: skip if no TOC markers
484        let has_toc_marker = ctx.content.contains("<!-- toc") || ctx.content.contains("<!--toc");
485        !has_toc_marker
486    }
487
488    fn check(&self, ctx: &LintContext) -> LintResult {
489        let mut warnings = Vec::new();
490
491        // Detect TOC region
492        let Some(region) = self.detect_toc_region(ctx) else {
493            // No TOC found - nothing to validate
494            return Ok(warnings);
495        };
496
497        // Extract actual TOC entries
498        let actual_entries = self.extract_toc_entries(ctx, &region);
499
500        // Build expected TOC from headings
501        let expected_entries = self.build_expected_toc(ctx, &region);
502
503        // If no expected entries and no actual entries, nothing to validate
504        if expected_entries.is_empty() && actual_entries.is_empty() {
505            return Ok(warnings);
506        }
507
508        // Validate
509        let mismatches = self.validate_toc(&actual_entries, &expected_entries);
510
511        if !mismatches.is_empty() {
512            // Generate a single warning at the TOC region with details
513            let mut details = Vec::new();
514
515            for mismatch in &mismatches {
516                match mismatch {
517                    TocMismatch::StaleEntry { entry } => {
518                        details.push(format!("Stale entry: '{}' (heading no longer exists)", entry.text));
519                    }
520                    TocMismatch::MissingEntry { expected } => {
521                        details.push(format!(
522                            "Missing entry: '{}' (line {})",
523                            expected.text, expected.heading_line
524                        ));
525                    }
526                    TocMismatch::TextMismatch { entry, expected } => {
527                        details.push(format!(
528                            "Text mismatch: TOC has '{}', heading is '{}'",
529                            entry.text, expected.text
530                        ));
531                    }
532                    TocMismatch::OrderMismatch {
533                        entry,
534                        expected_position,
535                    } => {
536                        details.push(format!(
537                            "Order mismatch: '{}' should be at position {}",
538                            entry.text, expected_position
539                        ));
540                    }
541                    TocMismatch::IndentationMismatch {
542                        entry,
543                        actual_indent,
544                        expected_indent,
545                        ..
546                    } => {
547                        details.push(format!(
548                            "Indentation mismatch: '{}' has {} spaces, expected {} spaces",
549                            entry.text, actual_indent, expected_indent
550                        ));
551                    }
552                }
553            }
554
555            let message = format!(
556                "Table of Contents does not match document headings: {}",
557                details.join("; ")
558            );
559
560            // Generate fix: replace entire TOC content
561            let new_toc = self.generate_toc(&expected_entries);
562            let fix_range = region.content_start..region.content_end;
563
564            warnings.push(LintWarning {
565                rule_name: Some(self.name().to_string()),
566                message,
567                line: region.start_line,
568                column: 1,
569                end_line: region.end_line,
570                end_column: 1,
571                severity: Severity::Warning,
572                fix: Some(Fix {
573                    range: fix_range,
574                    replacement: new_toc,
575                }),
576            });
577        }
578
579        Ok(warnings)
580    }
581
582    fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
583        // Detect TOC region
584        let Some(region) = self.detect_toc_region(ctx) else {
585            // No TOC found - return unchanged
586            return Ok(ctx.content.to_string());
587        };
588
589        // Build expected TOC from headings
590        let expected_entries = self.build_expected_toc(ctx, &region);
591
592        // Generate new TOC
593        let new_toc = self.generate_toc(&expected_entries);
594
595        // Replace the TOC content
596        let mut result = String::with_capacity(ctx.content.len());
597        result.push_str(&ctx.content[..region.content_start]);
598        result.push_str(&new_toc);
599        result.push_str(&ctx.content[region.content_end..]);
600
601        Ok(result)
602    }
603
604    fn category(&self) -> RuleCategory {
605        RuleCategory::Other
606    }
607
608    fn as_any(&self) -> &dyn std::any::Any {
609        self
610    }
611
612    fn default_config_section(&self) -> Option<(String, toml::Value)> {
613        let value: toml::Value = toml::from_str(
614            r#"
615# Whether this rule is enabled (opt-in, disabled by default)
616enabled = false
617# Minimum heading level to include
618min-level = 2
619# Maximum heading level to include
620max-level = 4
621# Whether TOC order must match document order
622enforce-order = true
623"#,
624        )
625        .ok()?;
626        Some(("MD073".to_string(), value))
627    }
628
629    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
630    where
631        Self: Sized,
632    {
633        let mut rule = MD073TocValidation::default();
634        let mut indent_from_md073 = false;
635
636        if let Some(rule_config) = config.rules.get("MD073") {
637            // Parse enabled (opt-in rule, defaults to false)
638            if let Some(enabled) = rule_config.values.get("enabled").and_then(|v| v.as_bool()) {
639                rule.enabled = enabled;
640            }
641
642            // Parse min-level
643            if let Some(min_level) = rule_config.values.get("min-level").and_then(|v| v.as_integer()) {
644                rule.min_level = (min_level.clamp(1, 6)) as u8;
645            }
646
647            // Parse max-level
648            if let Some(max_level) = rule_config.values.get("max-level").and_then(|v| v.as_integer()) {
649                rule.max_level = (max_level.clamp(1, 6)) as u8;
650            }
651
652            // Parse enforce-order
653            if let Some(enforce_order) = rule_config.values.get("enforce-order").and_then(|v| v.as_bool()) {
654                rule.enforce_order = enforce_order;
655            }
656
657            // Parse indent (MD073-specific override)
658            if let Some(indent) = rule_config.values.get("indent").and_then(|v| v.as_integer()) {
659                rule.indent = (indent.clamp(1, 8)) as usize;
660                indent_from_md073 = true;
661            }
662        }
663
664        // If indent not explicitly set in MD073, read from MD007 config
665        if !indent_from_md073
666            && let Some(md007_config) = config.rules.get("MD007")
667            && let Some(indent) = md007_config.values.get("indent").and_then(|v| v.as_integer())
668        {
669            rule.indent = (indent.clamp(1, 8)) as usize;
670        }
671
672        Box::new(rule)
673    }
674}
675
676#[cfg(test)]
677mod tests {
678    use super::*;
679    use crate::config::MarkdownFlavor;
680
681    fn create_ctx(content: &str) -> LintContext<'_> {
682        LintContext::new(content, MarkdownFlavor::Standard, None)
683    }
684
685    /// Create rule with enabled=true for tests that call check() directly
686    fn create_enabled_rule() -> MD073TocValidation {
687        MD073TocValidation {
688            enabled: true,
689            ..MD073TocValidation::default()
690        }
691    }
692
693    // ========== Detection Tests ==========
694
695    #[test]
696    fn test_detect_markers_basic() {
697        let rule = MD073TocValidation::new();
698        let content = r#"# Title
699
700<!-- toc -->
701
702- [Heading 1](#heading-1)
703
704<!-- tocstop -->
705
706## Heading 1
707
708Content here.
709"#;
710        let ctx = create_ctx(content);
711        let region = rule.detect_by_markers(&ctx);
712        assert!(region.is_some());
713        let region = region.unwrap();
714        // Verify region boundaries are detected correctly
715        assert_eq!(region.start_line, 4);
716        assert_eq!(region.end_line, 6);
717    }
718
719    #[test]
720    fn test_detect_markers_variations() {
721        let rule = MD073TocValidation::new();
722
723        // Test <!--toc--> (no spaces)
724        let content1 = "<!--toc-->\n- [A](#a)\n<!--tocstop-->\n";
725        let ctx1 = create_ctx(content1);
726        assert!(rule.detect_by_markers(&ctx1).is_some());
727
728        // Test <!-- TOC --> (uppercase)
729        let content2 = "<!-- TOC -->\n- [A](#a)\n<!-- TOCSTOP -->\n";
730        let ctx2 = create_ctx(content2);
731        assert!(rule.detect_by_markers(&ctx2).is_some());
732
733        // Test <!-- /toc --> (alternative stop marker)
734        let content3 = "<!-- toc -->\n- [A](#a)\n<!-- /toc -->\n";
735        let ctx3 = create_ctx(content3);
736        assert!(rule.detect_by_markers(&ctx3).is_some());
737    }
738
739    #[test]
740    fn test_no_toc_region() {
741        let rule = MD073TocValidation::new();
742        let content = r#"# Title
743
744## Heading 1
745
746Content here.
747
748## Heading 2
749
750More content.
751"#;
752        let ctx = create_ctx(content);
753        let region = rule.detect_toc_region(&ctx);
754        assert!(region.is_none());
755    }
756
757    // ========== Validation Tests ==========
758
759    #[test]
760    fn test_toc_matches_headings() {
761        let rule = create_enabled_rule();
762        let content = r#"# Title
763
764<!-- toc -->
765
766- [Heading 1](#heading-1)
767- [Heading 2](#heading-2)
768
769<!-- tocstop -->
770
771## Heading 1
772
773Content.
774
775## Heading 2
776
777More content.
778"#;
779        let ctx = create_ctx(content);
780        let result = rule.check(&ctx).unwrap();
781        assert!(result.is_empty(), "Expected no warnings for matching TOC");
782    }
783
784    #[test]
785    fn test_missing_entry() {
786        let rule = create_enabled_rule();
787        let content = r#"# Title
788
789<!-- toc -->
790
791- [Heading 1](#heading-1)
792
793<!-- tocstop -->
794
795## Heading 1
796
797Content.
798
799## Heading 2
800
801New heading not in TOC.
802"#;
803        let ctx = create_ctx(content);
804        let result = rule.check(&ctx).unwrap();
805        assert_eq!(result.len(), 1);
806        assert!(result[0].message.contains("Missing entry"));
807        assert!(result[0].message.contains("Heading 2"));
808    }
809
810    #[test]
811    fn test_stale_entry() {
812        let rule = create_enabled_rule();
813        let content = r#"# Title
814
815<!-- toc -->
816
817- [Heading 1](#heading-1)
818- [Deleted Heading](#deleted-heading)
819
820<!-- tocstop -->
821
822## Heading 1
823
824Content.
825"#;
826        let ctx = create_ctx(content);
827        let result = rule.check(&ctx).unwrap();
828        assert_eq!(result.len(), 1);
829        assert!(result[0].message.contains("Stale entry"));
830        assert!(result[0].message.contains("Deleted Heading"));
831    }
832
833    #[test]
834    fn test_text_mismatch() {
835        let rule = create_enabled_rule();
836        let content = r#"# Title
837
838<!-- toc -->
839
840- [Old Name](#heading-1)
841
842<!-- tocstop -->
843
844## Heading 1
845
846Content.
847"#;
848        let ctx = create_ctx(content);
849        let result = rule.check(&ctx).unwrap();
850        assert_eq!(result.len(), 1);
851        assert!(result[0].message.contains("Text mismatch"));
852    }
853
854    // ========== Level Filtering Tests ==========
855
856    #[test]
857    fn test_min_level_excludes_h1() {
858        let mut rule = MD073TocValidation::new();
859        rule.min_level = 2;
860
861        let content = r#"<!-- toc -->
862
863<!-- tocstop -->
864
865# Should Be Excluded
866
867## Should Be Included
868
869Content.
870"#;
871        let ctx = create_ctx(content);
872        let region = rule.detect_toc_region(&ctx).unwrap();
873        let expected = rule.build_expected_toc(&ctx, &region);
874
875        assert_eq!(expected.len(), 1);
876        assert_eq!(expected[0].text, "Should Be Included");
877    }
878
879    #[test]
880    fn test_max_level_excludes_h5_h6() {
881        let mut rule = MD073TocValidation::new();
882        rule.max_level = 4;
883
884        let content = r#"<!-- toc -->
885
886<!-- tocstop -->
887
888## Level 2
889
890### Level 3
891
892#### Level 4
893
894##### Level 5 Should Be Excluded
895
896###### Level 6 Should Be Excluded
897"#;
898        let ctx = create_ctx(content);
899        let region = rule.detect_toc_region(&ctx).unwrap();
900        let expected = rule.build_expected_toc(&ctx, &region);
901
902        assert_eq!(expected.len(), 3);
903        assert!(expected.iter().all(|e| e.level <= 4));
904    }
905
906    // ========== Fix Tests ==========
907
908    #[test]
909    fn test_fix_adds_missing_entry() {
910        let rule = MD073TocValidation::new();
911        let content = r#"# Title
912
913<!-- toc -->
914
915- [Heading 1](#heading-1)
916
917<!-- tocstop -->
918
919## Heading 1
920
921Content.
922
923## Heading 2
924
925New heading.
926"#;
927        let ctx = create_ctx(content);
928        let fixed = rule.fix(&ctx).unwrap();
929        assert!(fixed.contains("- [Heading 2](#heading-2)"));
930    }
931
932    #[test]
933    fn test_fix_removes_stale_entry() {
934        let rule = MD073TocValidation::new();
935        let content = r#"# Title
936
937<!-- toc -->
938
939- [Heading 1](#heading-1)
940- [Deleted](#deleted)
941
942<!-- tocstop -->
943
944## Heading 1
945
946Content.
947"#;
948        let ctx = create_ctx(content);
949        let fixed = rule.fix(&ctx).unwrap();
950        assert!(fixed.contains("- [Heading 1](#heading-1)"));
951        assert!(!fixed.contains("Deleted"));
952    }
953
954    #[test]
955    fn test_fix_idempotent() {
956        let rule = MD073TocValidation::new();
957        let content = r#"# Title
958
959<!-- toc -->
960
961- [Heading 1](#heading-1)
962- [Heading 2](#heading-2)
963
964<!-- tocstop -->
965
966## Heading 1
967
968Content.
969
970## Heading 2
971
972More.
973"#;
974        let ctx = create_ctx(content);
975        let fixed1 = rule.fix(&ctx).unwrap();
976        let ctx2 = create_ctx(&fixed1);
977        let fixed2 = rule.fix(&ctx2).unwrap();
978
979        // Second fix should produce same output
980        assert_eq!(fixed1, fixed2);
981    }
982
983    #[test]
984    fn test_fix_preserves_markers() {
985        let rule = MD073TocValidation::new();
986        let content = r#"# Title
987
988<!-- toc -->
989
990Old TOC content.
991
992<!-- tocstop -->
993
994## New Heading
995
996Content.
997"#;
998        let ctx = create_ctx(content);
999        let fixed = rule.fix(&ctx).unwrap();
1000
1001        // Markers should still be present
1002        assert!(fixed.contains("<!-- toc -->"));
1003        assert!(fixed.contains("<!-- tocstop -->"));
1004        // New content should be generated
1005        assert!(fixed.contains("- [New Heading](#new-heading)"));
1006    }
1007
1008    #[test]
1009    fn test_fix_requires_markers() {
1010        let rule = create_enabled_rule();
1011
1012        // Document without markers - no TOC detected, no changes
1013        let content_no_markers = r#"# Title
1014
1015## Heading 1
1016
1017Content.
1018"#;
1019        let ctx = create_ctx(content_no_markers);
1020        let fixed = rule.fix(&ctx).unwrap();
1021        assert_eq!(fixed, content_no_markers);
1022
1023        // Document with markers - TOC detected and fixed
1024        let content_markers = r#"# Title
1025
1026<!-- toc -->
1027
1028- [Old Entry](#old-entry)
1029
1030<!-- tocstop -->
1031
1032## Heading 1
1033
1034Content.
1035"#;
1036        let ctx = create_ctx(content_markers);
1037        let fixed = rule.fix(&ctx).unwrap();
1038        assert!(fixed.contains("- [Heading 1](#heading-1)"));
1039        assert!(!fixed.contains("Old Entry"));
1040    }
1041
1042    // ========== Anchor Tests ==========
1043
1044    #[test]
1045    fn test_duplicate_heading_anchors() {
1046        let rule = MD073TocValidation::new();
1047        let content = r#"# Title
1048
1049<!-- toc -->
1050
1051<!-- tocstop -->
1052
1053## Duplicate
1054
1055Content.
1056
1057## Duplicate
1058
1059More content.
1060
1061## Duplicate
1062
1063Even more.
1064"#;
1065        let ctx = create_ctx(content);
1066        let region = rule.detect_toc_region(&ctx).unwrap();
1067        let expected = rule.build_expected_toc(&ctx, &region);
1068
1069        assert_eq!(expected.len(), 3);
1070        assert_eq!(expected[0].anchor, "duplicate");
1071        assert_eq!(expected[1].anchor, "duplicate-1");
1072        assert_eq!(expected[2].anchor, "duplicate-2");
1073    }
1074
1075    // ========== Edge Cases ==========
1076
1077    #[test]
1078    fn test_headings_in_code_blocks_ignored() {
1079        let rule = create_enabled_rule();
1080        let content = r#"# Title
1081
1082<!-- toc -->
1083
1084- [Real Heading](#real-heading)
1085
1086<!-- tocstop -->
1087
1088## Real Heading
1089
1090```markdown
1091## Fake Heading In Code
1092```
1093
1094Content.
1095"#;
1096        let ctx = create_ctx(content);
1097        let result = rule.check(&ctx).unwrap();
1098        assert!(result.is_empty(), "Should not report fake heading in code block");
1099    }
1100
1101    #[test]
1102    fn test_empty_toc_region() {
1103        let rule = create_enabled_rule();
1104        let content = r#"# Title
1105
1106<!-- toc -->
1107<!-- tocstop -->
1108
1109## Heading 1
1110
1111Content.
1112"#;
1113        let ctx = create_ctx(content);
1114        let result = rule.check(&ctx).unwrap();
1115        assert_eq!(result.len(), 1);
1116        assert!(result[0].message.contains("Missing entry"));
1117    }
1118
1119    #[test]
1120    fn test_nested_indentation() {
1121        let rule = create_enabled_rule();
1122
1123        let content = r#"<!-- toc -->
1124
1125<!-- tocstop -->
1126
1127## Level 2
1128
1129### Level 3
1130
1131#### Level 4
1132
1133## Another Level 2
1134"#;
1135        let ctx = create_ctx(content);
1136        let region = rule.detect_toc_region(&ctx).unwrap();
1137        let expected = rule.build_expected_toc(&ctx, &region);
1138        let toc = rule.generate_toc(&expected);
1139
1140        // Check indentation (always nested)
1141        assert!(toc.contains("- [Level 2](#level-2)"));
1142        assert!(toc.contains("  - [Level 3](#level-3)"));
1143        assert!(toc.contains("    - [Level 4](#level-4)"));
1144        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1145    }
1146
1147    // ========== Indentation Mismatch Tests ==========
1148
1149    #[test]
1150    fn test_indentation_mismatch_detected() {
1151        let rule = create_enabled_rule();
1152        // TOC entries are all at same indentation level, but headings have different levels
1153        let content = r#"<!-- toc -->
1154- [Hello](#hello)
1155- [Another](#another)
1156- [Heading](#heading)
1157<!-- tocstop -->
1158
1159## Hello
1160
1161### Another
1162
1163## Heading
1164"#;
1165        let ctx = create_ctx(content);
1166        let result = rule.check(&ctx).unwrap();
1167        // Should detect indentation mismatch - "Another" is level 3 but has no indent
1168        assert_eq!(result.len(), 1, "Should report indentation mismatch: {result:?}");
1169        assert!(
1170            result[0].message.contains("Indentation mismatch"),
1171            "Message should mention indentation: {}",
1172            result[0].message
1173        );
1174        assert!(
1175            result[0].message.contains("Another"),
1176            "Message should mention the entry: {}",
1177            result[0].message
1178        );
1179    }
1180
1181    #[test]
1182    fn test_indentation_mismatch_fixed() {
1183        let rule = create_enabled_rule();
1184        // TOC entries are all at same indentation level, but headings have different levels
1185        let content = r#"<!-- toc -->
1186- [Hello](#hello)
1187- [Another](#another)
1188- [Heading](#heading)
1189<!-- tocstop -->
1190
1191## Hello
1192
1193### Another
1194
1195## Heading
1196"#;
1197        let ctx = create_ctx(content);
1198        let fixed = rule.fix(&ctx).unwrap();
1199        // After fix, "Another" should be indented
1200        assert!(fixed.contains("- [Hello](#hello)"));
1201        assert!(fixed.contains("  - [Another](#another)")); // Indented with 2 spaces
1202        assert!(fixed.contains("- [Heading](#heading)"));
1203    }
1204
1205    #[test]
1206    fn test_no_indentation_mismatch_when_correct() {
1207        let rule = create_enabled_rule();
1208        // TOC has correct indentation
1209        let content = r#"<!-- toc -->
1210- [Hello](#hello)
1211  - [Another](#another)
1212- [Heading](#heading)
1213<!-- tocstop -->
1214
1215## Hello
1216
1217### Another
1218
1219## Heading
1220"#;
1221        let ctx = create_ctx(content);
1222        let result = rule.check(&ctx).unwrap();
1223        // Should not report any issues - indentation is correct
1224        assert!(result.is_empty(), "Should not report issues: {result:?}");
1225    }
1226
1227    // ========== Order Mismatch Tests ==========
1228
1229    #[test]
1230    fn test_order_mismatch_detected() {
1231        let rule = create_enabled_rule();
1232        let content = r#"# Title
1233
1234<!-- toc -->
1235
1236- [Section B](#section-b)
1237- [Section A](#section-a)
1238
1239<!-- tocstop -->
1240
1241## Section A
1242
1243Content A.
1244
1245## Section B
1246
1247Content B.
1248"#;
1249        let ctx = create_ctx(content);
1250        let result = rule.check(&ctx).unwrap();
1251        // Should detect order mismatch - Section B appears before Section A in TOC
1252        // but Section A comes first in document
1253        assert!(!result.is_empty(), "Should detect order mismatch");
1254    }
1255
1256    #[test]
1257    fn test_order_mismatch_ignored_when_disabled() {
1258        let mut rule = create_enabled_rule();
1259        rule.enforce_order = false;
1260        let content = r#"# Title
1261
1262<!-- toc -->
1263
1264- [Section B](#section-b)
1265- [Section A](#section-a)
1266
1267<!-- tocstop -->
1268
1269## Section A
1270
1271Content A.
1272
1273## Section B
1274
1275Content B.
1276"#;
1277        let ctx = create_ctx(content);
1278        let result = rule.check(&ctx).unwrap();
1279        // With enforce_order=false, order mismatches should be ignored
1280        assert!(result.is_empty(), "Should not report order mismatch when disabled");
1281    }
1282
1283    // ========== Unicode and Special Characters Tests ==========
1284
1285    #[test]
1286    fn test_unicode_headings() {
1287        let rule = create_enabled_rule();
1288        let content = r#"# Title
1289
1290<!-- toc -->
1291
1292- [日本語の見出し](#日本語の見出し)
1293- [Émojis 🎉](#émojis-)
1294
1295<!-- tocstop -->
1296
1297## 日本語の見出し
1298
1299Japanese content.
1300
1301## Émojis 🎉
1302
1303Content with emojis.
1304"#;
1305        let ctx = create_ctx(content);
1306        let result = rule.check(&ctx).unwrap();
1307        // Should handle unicode correctly
1308        assert!(result.is_empty(), "Should handle unicode headings");
1309    }
1310
1311    #[test]
1312    fn test_special_characters_in_headings() {
1313        let rule = create_enabled_rule();
1314        let content = r#"# Title
1315
1316<!-- toc -->
1317
1318- [What's New?](#whats-new)
1319- [C++ Guide](#c-guide)
1320
1321<!-- tocstop -->
1322
1323## What's New?
1324
1325News content.
1326
1327## C++ Guide
1328
1329C++ content.
1330"#;
1331        let ctx = create_ctx(content);
1332        let result = rule.check(&ctx).unwrap();
1333        assert!(result.is_empty(), "Should handle special characters");
1334    }
1335
1336    #[test]
1337    fn test_code_spans_in_headings() {
1338        let rule = create_enabled_rule();
1339        let content = r#"# Title
1340
1341<!-- toc -->
1342
1343- [`check [PATHS...]`](#check-paths)
1344
1345<!-- tocstop -->
1346
1347## `check [PATHS...]`
1348
1349Command documentation.
1350"#;
1351        let ctx = create_ctx(content);
1352        let result = rule.check(&ctx).unwrap();
1353        assert!(result.is_empty(), "Should handle code spans in headings with brackets");
1354    }
1355
1356    // ========== Config Tests ==========
1357
1358    #[test]
1359    fn test_from_config_defaults() {
1360        let config = crate::config::Config::default();
1361        let rule = MD073TocValidation::from_config(&config);
1362        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1363
1364        assert_eq!(rule.min_level, 2);
1365        assert_eq!(rule.max_level, 4);
1366        assert!(rule.enforce_order);
1367        assert_eq!(rule.indent, 2);
1368    }
1369
1370    #[test]
1371    fn test_indent_from_md007_config() {
1372        use crate::config::{Config, RuleConfig};
1373        use std::collections::BTreeMap;
1374
1375        let mut config = Config::default();
1376
1377        // Set MD007 indent to 4
1378        let mut md007_values = BTreeMap::new();
1379        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1380        config.rules.insert(
1381            "MD007".to_string(),
1382            RuleConfig {
1383                severity: None,
1384                values: md007_values,
1385            },
1386        );
1387
1388        let rule = MD073TocValidation::from_config(&config);
1389        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1390
1391        assert_eq!(rule.indent, 4, "Should read indent from MD007 config");
1392    }
1393
1394    #[test]
1395    fn test_indent_md073_overrides_md007() {
1396        use crate::config::{Config, RuleConfig};
1397        use std::collections::BTreeMap;
1398
1399        let mut config = Config::default();
1400
1401        // Set MD007 indent to 4
1402        let mut md007_values = BTreeMap::new();
1403        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1404        config.rules.insert(
1405            "MD007".to_string(),
1406            RuleConfig {
1407                severity: None,
1408                values: md007_values,
1409            },
1410        );
1411
1412        // Set MD073 indent to 3 (should override MD007)
1413        let mut md073_values = BTreeMap::new();
1414        md073_values.insert("enabled".to_string(), toml::Value::Boolean(true));
1415        md073_values.insert("indent".to_string(), toml::Value::Integer(3));
1416        config.rules.insert(
1417            "MD073".to_string(),
1418            RuleConfig {
1419                severity: None,
1420                values: md073_values,
1421            },
1422        );
1423
1424        let rule = MD073TocValidation::from_config(&config);
1425        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1426
1427        assert_eq!(rule.indent, 3, "MD073 indent should override MD007");
1428    }
1429
1430    #[test]
1431    fn test_generate_toc_with_4_space_indent() {
1432        let mut rule = create_enabled_rule();
1433        rule.indent = 4;
1434
1435        let content = r#"<!-- toc -->
1436
1437<!-- tocstop -->
1438
1439## Level 2
1440
1441### Level 3
1442
1443#### Level 4
1444
1445## Another Level 2
1446"#;
1447        let ctx = create_ctx(content);
1448        let region = rule.detect_toc_region(&ctx).unwrap();
1449        let expected = rule.build_expected_toc(&ctx, &region);
1450        let toc = rule.generate_toc(&expected);
1451
1452        // With 4-space indent:
1453        // Level 2 = 0 spaces (base level)
1454        // Level 3 = 4 spaces
1455        // Level 4 = 8 spaces
1456        assert!(toc.contains("- [Level 2](#level-2)"), "Level 2 should have no indent");
1457        assert!(
1458            toc.contains("    - [Level 3](#level-3)"),
1459            "Level 3 should have 4-space indent"
1460        );
1461        assert!(
1462            toc.contains("        - [Level 4](#level-4)"),
1463            "Level 4 should have 8-space indent"
1464        );
1465        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1466    }
1467
1468    #[test]
1469    fn test_validate_toc_with_4_space_indent() {
1470        let mut rule = create_enabled_rule();
1471        rule.indent = 4;
1472
1473        // TOC with correct 4-space indentation
1474        let content = r#"<!-- toc -->
1475- [Hello](#hello)
1476    - [Another](#another)
1477- [Heading](#heading)
1478<!-- tocstop -->
1479
1480## Hello
1481
1482### Another
1483
1484## Heading
1485"#;
1486        let ctx = create_ctx(content);
1487        let result = rule.check(&ctx).unwrap();
1488        assert!(
1489            result.is_empty(),
1490            "Should accept 4-space indent when configured: {result:?}"
1491        );
1492    }
1493
1494    #[test]
1495    fn test_validate_toc_wrong_indent_with_4_space_config() {
1496        let mut rule = create_enabled_rule();
1497        rule.indent = 4;
1498
1499        // TOC with 2-space indentation (wrong when 4-space is configured)
1500        let content = r#"<!-- toc -->
1501- [Hello](#hello)
1502  - [Another](#another)
1503- [Heading](#heading)
1504<!-- tocstop -->
1505
1506## Hello
1507
1508### Another
1509
1510## Heading
1511"#;
1512        let ctx = create_ctx(content);
1513        let result = rule.check(&ctx).unwrap();
1514        assert_eq!(result.len(), 1, "Should detect wrong indent");
1515        assert!(
1516            result[0].message.contains("Indentation mismatch"),
1517            "Should report indentation mismatch: {}",
1518            result[0].message
1519        );
1520        assert!(
1521            result[0].message.contains("expected 4 spaces"),
1522            "Should mention expected 4 spaces: {}",
1523            result[0].message
1524        );
1525    }
1526
1527    // ========== Markdown Stripping Tests ==========
1528
1529    #[test]
1530    fn test_strip_markdown_formatting_link() {
1531        let result = strip_markdown_formatting("Tool: [terminal](https://example.com)");
1532        assert_eq!(result, "Tool: terminal");
1533    }
1534
1535    #[test]
1536    fn test_strip_markdown_formatting_bold() {
1537        let result = strip_markdown_formatting("This is **bold** text");
1538        assert_eq!(result, "This is bold text");
1539
1540        let result = strip_markdown_formatting("This is __bold__ text");
1541        assert_eq!(result, "This is bold text");
1542    }
1543
1544    #[test]
1545    fn test_strip_markdown_formatting_italic() {
1546        let result = strip_markdown_formatting("This is *italic* text");
1547        assert_eq!(result, "This is italic text");
1548
1549        let result = strip_markdown_formatting("This is _italic_ text");
1550        assert_eq!(result, "This is italic text");
1551    }
1552
1553    #[test]
1554    fn test_strip_markdown_formatting_code_span() {
1555        let result = strip_markdown_formatting("Use the `format` function");
1556        assert_eq!(result, "Use the format function");
1557    }
1558
1559    #[test]
1560    fn test_strip_markdown_formatting_image() {
1561        let result = strip_markdown_formatting("See ![logo](image.png) for details");
1562        assert_eq!(result, "See logo for details");
1563    }
1564
1565    #[test]
1566    fn test_strip_markdown_formatting_reference_link() {
1567        let result = strip_markdown_formatting("See [documentation][docs] for details");
1568        assert_eq!(result, "See documentation for details");
1569    }
1570
1571    #[test]
1572    fn test_strip_markdown_formatting_combined() {
1573        // Link is stripped first, leaving bold, then bold is stripped
1574        let result = strip_markdown_formatting("Tool: [**terminal**](https://example.com)");
1575        assert_eq!(result, "Tool: terminal");
1576    }
1577
1578    #[test]
1579    fn test_toc_with_link_in_heading_matches_stripped_text() {
1580        let rule = create_enabled_rule();
1581
1582        // TOC entry text matches the stripped heading text
1583        let content = r#"# Title
1584
1585<!-- toc -->
1586
1587- [Tool: terminal](#tool-terminal)
1588
1589<!-- tocstop -->
1590
1591## Tool: [terminal](https://example.com)
1592
1593Content here.
1594"#;
1595        let ctx = create_ctx(content);
1596        let result = rule.check(&ctx).unwrap();
1597        assert!(
1598            result.is_empty(),
1599            "Stripped heading text should match TOC entry: {result:?}"
1600        );
1601    }
1602
1603    #[test]
1604    fn test_toc_with_simplified_text_still_mismatches() {
1605        let rule = create_enabled_rule();
1606
1607        // TOC entry "terminal" does NOT match stripped heading "Tool: terminal"
1608        let content = r#"# Title
1609
1610<!-- toc -->
1611
1612- [terminal](#tool-terminal)
1613
1614<!-- tocstop -->
1615
1616## Tool: [terminal](https://example.com)
1617
1618Content here.
1619"#;
1620        let ctx = create_ctx(content);
1621        let result = rule.check(&ctx).unwrap();
1622        assert_eq!(result.len(), 1, "Should report text mismatch");
1623        assert!(result[0].message.contains("Text mismatch"));
1624    }
1625
1626    #[test]
1627    fn test_fix_generates_stripped_toc_entries() {
1628        let rule = MD073TocValidation::new();
1629        let content = r#"# Title
1630
1631<!-- toc -->
1632
1633<!-- tocstop -->
1634
1635## Tool: [busybox](https://www.busybox.net/)
1636
1637Content.
1638
1639## Tool: [mount](https://en.wikipedia.org/wiki/Mount)
1640
1641More content.
1642"#;
1643        let ctx = create_ctx(content);
1644        let fixed = rule.fix(&ctx).unwrap();
1645
1646        // Generated TOC should have stripped text (links removed)
1647        assert!(
1648            fixed.contains("- [Tool: busybox](#tool-busybox)"),
1649            "TOC entry should have stripped link text"
1650        );
1651        assert!(
1652            fixed.contains("- [Tool: mount](#tool-mount)"),
1653            "TOC entry should have stripped link text"
1654        );
1655        // TOC entries should NOT contain the URL (the actual headings in the document still will)
1656        // Check only within the TOC region (between toc markers)
1657        let toc_start = fixed.find("<!-- toc -->").unwrap();
1658        let toc_end = fixed.find("<!-- tocstop -->").unwrap();
1659        let toc_content = &fixed[toc_start..toc_end];
1660        assert!(
1661            !toc_content.contains("busybox.net"),
1662            "TOC should not contain URLs: {toc_content}"
1663        );
1664        assert!(
1665            !toc_content.contains("wikipedia.org"),
1666            "TOC should not contain URLs: {toc_content}"
1667        );
1668    }
1669
1670    #[test]
1671    fn test_fix_with_bold_in_heading() {
1672        let rule = MD073TocValidation::new();
1673        let content = r#"# Title
1674
1675<!-- toc -->
1676
1677<!-- tocstop -->
1678
1679## **Important** Section
1680
1681Content.
1682"#;
1683        let ctx = create_ctx(content);
1684        let fixed = rule.fix(&ctx).unwrap();
1685
1686        // Generated TOC should have stripped text (bold markers removed)
1687        assert!(fixed.contains("- [Important Section](#important-section)"));
1688    }
1689
1690    #[test]
1691    fn test_fix_with_code_in_heading() {
1692        let rule = MD073TocValidation::new();
1693        let content = r#"# Title
1694
1695<!-- toc -->
1696
1697<!-- tocstop -->
1698
1699## Using `async` Functions
1700
1701Content.
1702"#;
1703        let ctx = create_ctx(content);
1704        let fixed = rule.fix(&ctx).unwrap();
1705
1706        // Generated TOC should have stripped text (backticks removed)
1707        assert!(fixed.contains("- [Using async Functions](#using-async-functions)"));
1708    }
1709
1710    // ========== Custom Anchor Tests ==========
1711
1712    #[test]
1713    fn test_custom_anchor_id_respected() {
1714        let rule = create_enabled_rule();
1715        let content = r#"# Title
1716
1717<!-- toc -->
1718
1719- [My Section](#my-custom-anchor)
1720
1721<!-- tocstop -->
1722
1723## My Section {#my-custom-anchor}
1724
1725Content here.
1726"#;
1727        let ctx = create_ctx(content);
1728        let result = rule.check(&ctx).unwrap();
1729        assert!(result.is_empty(), "Should respect custom anchor IDs: {result:?}");
1730    }
1731
1732    #[test]
1733    fn test_custom_anchor_id_in_generated_toc() {
1734        let rule = create_enabled_rule();
1735        let content = r#"# Title
1736
1737<!-- toc -->
1738
1739<!-- tocstop -->
1740
1741## First Section {#custom-first}
1742
1743Content.
1744
1745## Second Section {#another-custom}
1746
1747More content.
1748"#;
1749        let ctx = create_ctx(content);
1750        let fixed = rule.fix(&ctx).unwrap();
1751        assert!(fixed.contains("- [First Section](#custom-first)"));
1752        assert!(fixed.contains("- [Second Section](#another-custom)"));
1753    }
1754
1755    #[test]
1756    fn test_mixed_custom_and_generated_anchors() {
1757        let rule = create_enabled_rule();
1758        let content = r#"# Title
1759
1760<!-- toc -->
1761
1762- [Custom Section](#my-id)
1763- [Normal Section](#normal-section)
1764
1765<!-- tocstop -->
1766
1767## Custom Section {#my-id}
1768
1769Content.
1770
1771## Normal Section
1772
1773More content.
1774"#;
1775        let ctx = create_ctx(content);
1776        let result = rule.check(&ctx).unwrap();
1777        assert!(result.is_empty(), "Should handle mixed custom and generated anchors");
1778    }
1779
1780    // ========== Anchor Generation Tests ==========
1781
1782    #[test]
1783    fn test_github_anchor_style() {
1784        let rule = create_enabled_rule();
1785
1786        let content = r#"<!-- toc -->
1787
1788<!-- tocstop -->
1789
1790## Test_With_Underscores
1791
1792Content.
1793"#;
1794        let ctx = create_ctx(content);
1795        let region = rule.detect_toc_region(&ctx).unwrap();
1796        let expected = rule.build_expected_toc(&ctx, &region);
1797
1798        // GitHub-style anchors preserve underscores
1799        assert_eq!(expected[0].anchor, "test_with_underscores");
1800    }
1801
1802    // ========== Stress Tests ==========
1803
1804    #[test]
1805    fn test_stress_many_headings() {
1806        let rule = create_enabled_rule();
1807
1808        // Generate a document with 150 headings
1809        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1810
1811        for i in 1..=150 {
1812            content.push_str(&format!("## Heading Number {i}\n\nContent for section {i}.\n\n"));
1813        }
1814
1815        let ctx = create_ctx(&content);
1816
1817        // Should not panic or timeout
1818        let result = rule.check(&ctx).unwrap();
1819
1820        // Should report missing entries for all 150 headings
1821        assert_eq!(result.len(), 1, "Should report single warning for TOC");
1822        assert!(result[0].message.contains("Missing entry"));
1823
1824        // Fix should generate TOC with 150 entries
1825        let fixed = rule.fix(&ctx).unwrap();
1826        assert!(fixed.contains("- [Heading Number 1](#heading-number-1)"));
1827        assert!(fixed.contains("- [Heading Number 100](#heading-number-100)"));
1828        assert!(fixed.contains("- [Heading Number 150](#heading-number-150)"));
1829    }
1830
1831    #[test]
1832    fn test_stress_deeply_nested() {
1833        let rule = create_enabled_rule();
1834        let content = r#"# Title
1835
1836<!-- toc -->
1837
1838<!-- tocstop -->
1839
1840## Level 2 A
1841
1842### Level 3 A
1843
1844#### Level 4 A
1845
1846## Level 2 B
1847
1848### Level 3 B
1849
1850#### Level 4 B
1851
1852## Level 2 C
1853
1854### Level 3 C
1855
1856#### Level 4 C
1857
1858## Level 2 D
1859
1860### Level 3 D
1861
1862#### Level 4 D
1863"#;
1864        let ctx = create_ctx(content);
1865        let fixed = rule.fix(&ctx).unwrap();
1866
1867        // Check nested indentation is correct
1868        assert!(fixed.contains("- [Level 2 A](#level-2-a)"));
1869        assert!(fixed.contains("  - [Level 3 A](#level-3-a)"));
1870        assert!(fixed.contains("    - [Level 4 A](#level-4-a)"));
1871        assert!(fixed.contains("- [Level 2 D](#level-2-d)"));
1872        assert!(fixed.contains("  - [Level 3 D](#level-3-d)"));
1873        assert!(fixed.contains("    - [Level 4 D](#level-4-d)"));
1874    }
1875
1876    #[test]
1877    fn test_stress_many_duplicates() {
1878        let rule = create_enabled_rule();
1879
1880        // Generate 50 headings with the same text
1881        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1882        for _ in 0..50 {
1883            content.push_str("## FAQ\n\nContent.\n\n");
1884        }
1885
1886        let ctx = create_ctx(&content);
1887        let region = rule.detect_toc_region(&ctx).unwrap();
1888        let expected = rule.build_expected_toc(&ctx, &region);
1889
1890        // Should generate unique anchors for all 50
1891        assert_eq!(expected.len(), 50);
1892        assert_eq!(expected[0].anchor, "faq");
1893        assert_eq!(expected[1].anchor, "faq-1");
1894        assert_eq!(expected[49].anchor, "faq-49");
1895    }
1896}