Skip to main content

rumdl_lib/rules/
md073_toc_validation.rs

1//! MD073: Table of Contents validation rule
2//!
3//! Validates that TOC sections match the actual document headings.
4
5use crate::lint_context::LintContext;
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::anchor_styles::AnchorStyle;
8use regex::Regex;
9use std::collections::HashMap;
10use std::sync::LazyLock;
11
12/// Regex for TOC start marker: `<!-- toc -->` with optional whitespace variations
13static TOC_START_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*toc\s*-->").unwrap());
14
15/// Regex for TOC stop marker: `<!-- tocstop -->` or `<!-- /toc -->`
16static TOC_STOP_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*(?:tocstop|/toc)\s*-->").unwrap());
17
18/// Regex for extracting TOC entries: `- [text](#anchor)` or `* [text](#anchor)`
19/// with optional leading whitespace for nested items
20/// Handles nested brackets like `[`check [PATHS...]`](#check-paths)`
21static TOC_ENTRY_PATTERN: LazyLock<Regex> =
22    LazyLock::new(|| Regex::new(r"^(\s*)[-*]\s+\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]\(#([^)]+)\)").unwrap());
23
24/// Represents a detected TOC region in the document
25#[derive(Debug, Clone)]
26struct TocRegion {
27    /// 1-indexed start line of the TOC content (after the marker)
28    start_line: usize,
29    /// 1-indexed end line of the TOC content (before the stop marker)
30    end_line: usize,
31    /// Byte offset where TOC content starts
32    content_start: usize,
33    /// Byte offset where TOC content ends
34    content_end: usize,
35}
36
37/// A parsed TOC entry from the existing TOC
38#[derive(Debug, Clone)]
39struct TocEntry {
40    /// Display text of the link
41    text: String,
42    /// Anchor/fragment (without #)
43    anchor: String,
44    /// Number of leading whitespace characters (for indentation checking)
45    indent_spaces: usize,
46}
47
48/// An expected TOC entry generated from document headings
49#[derive(Debug, Clone)]
50struct ExpectedTocEntry {
51    /// 1-indexed line number of the heading
52    heading_line: usize,
53    /// Heading level (1-6)
54    level: u8,
55    /// Heading text (for display)
56    text: String,
57    /// Generated anchor
58    anchor: String,
59}
60
61/// Types of mismatches between actual and expected TOC
62#[derive(Debug)]
63enum TocMismatch {
64    /// Entry exists in TOC but heading doesn't exist
65    StaleEntry { entry: TocEntry },
66    /// Heading exists but no TOC entry for it
67    MissingEntry { expected: ExpectedTocEntry },
68    /// TOC entry text doesn't match heading text
69    TextMismatch {
70        entry: TocEntry,
71        expected: ExpectedTocEntry,
72    },
73    /// TOC entries are in wrong order
74    OrderMismatch { entry: TocEntry, expected_position: usize },
75    /// TOC entry has wrong indentation level
76    IndentationMismatch {
77        entry: TocEntry,
78        actual_indent: usize,
79        expected_indent: usize,
80    },
81}
82
83/// Regex patterns for stripping markdown formatting from heading text
84static MARKDOWN_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\([^)]+\)").unwrap());
85static MARKDOWN_REF_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[[^\]]*\]").unwrap());
86static MARKDOWN_IMAGE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").unwrap());
87static MARKDOWN_CODE_SPAN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`([^`]+)`").unwrap());
88static MARKDOWN_BOLD_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*([^*]+)\*\*").unwrap());
89static MARKDOWN_BOLD_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__([^_]+)__").unwrap());
90static MARKDOWN_ITALIC_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+)\*").unwrap());
91// Match underscore italic at word boundaries (space or start/end)
92// Handles: "_text_", " _text_ ", "start _text_", "_text_ end"
93static MARKDOWN_ITALIC_UNDERSCORE: LazyLock<Regex> =
94    LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])_([^_]+)_([^a-zA-Z0-9]|$)").unwrap());
95
96/// Strip markdown formatting from text, preserving plain text content.
97/// Used for TOC entry display text.
98///
99/// Examples:
100/// - `[terminal](url)` → `terminal`
101/// - `**bold**` → `bold`
102/// - `` `code` `` → `code`
103/// - `Tool: [terminal](url)` → `Tool: terminal`
104fn strip_markdown_formatting(text: &str) -> String {
105    let mut result = text.to_string();
106
107    // Strip images first (before links, since images use similar syntax)
108    result = MARKDOWN_IMAGE.replace_all(&result, "$1").to_string();
109
110    // Strip links: [text](url) → text
111    result = MARKDOWN_LINK.replace_all(&result, "$1").to_string();
112
113    // Strip reference links: [text][ref] → text
114    result = MARKDOWN_REF_LINK.replace_all(&result, "$1").to_string();
115
116    // Strip code spans: `code` → code
117    result = MARKDOWN_CODE_SPAN.replace_all(&result, "$1").to_string();
118
119    // Strip bold (do double before single to handle nested)
120    result = MARKDOWN_BOLD_ASTERISK.replace_all(&result, "$1").to_string();
121    result = MARKDOWN_BOLD_UNDERSCORE.replace_all(&result, "$1").to_string();
122
123    // Strip italic
124    result = MARKDOWN_ITALIC_ASTERISK.replace_all(&result, "$1").to_string();
125    // Underscore italic: preserve boundary chars, extract content
126    result = MARKDOWN_ITALIC_UNDERSCORE.replace_all(&result, "$1$2$3").to_string();
127
128    result
129}
130
131/// MD073: Table of Contents Validation
132///
133/// This rule validates that TOC sections match the actual document headings.
134/// It detects TOC regions via markers (`<!-- toc -->...<!-- tocstop -->`).
135///
136/// To opt into TOC validation, add markers to your document:
137/// ```markdown
138/// <!-- toc -->
139/// - [Section](#section)
140/// <!-- tocstop -->
141/// ```
142///
143/// ## Configuration
144///
145/// ```toml
146/// [MD073]
147/// # Enable the rule (opt-in, disabled by default)
148/// enabled = true
149/// # Minimum heading level to include (default: 2)
150/// min-level = 2
151/// # Maximum heading level to include (default: 4)
152/// max-level = 4
153/// # Whether TOC order must match document order (default: true)
154/// enforce-order = true
155/// # Indent size per nesting level (default: from MD007 config, or 2)
156/// indent = 2
157/// ```
158#[derive(Clone)]
159pub struct MD073TocValidation {
160    /// Whether this rule is enabled (default: false - opt-in rule)
161    enabled: bool,
162    /// Minimum heading level to include
163    min_level: u8,
164    /// Maximum heading level to include
165    max_level: u8,
166    /// Whether to enforce order matching
167    enforce_order: bool,
168    /// Indent size per nesting level (reads from MD007 config by default)
169    pub indent: usize,
170}
171
172impl Default for MD073TocValidation {
173    fn default() -> Self {
174        Self {
175            enabled: false, // Disabled by default - opt-in rule
176            min_level: 2,
177            max_level: 4,
178            enforce_order: true,
179            indent: 2, // Default indent, can be overridden by MD007 config
180        }
181    }
182}
183
184impl std::fmt::Debug for MD073TocValidation {
185    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
186        f.debug_struct("MD073TocValidation")
187            .field("enabled", &self.enabled)
188            .field("min_level", &self.min_level)
189            .field("max_level", &self.max_level)
190            .field("enforce_order", &self.enforce_order)
191            .field("indent", &self.indent)
192            .finish()
193    }
194}
195
196impl MD073TocValidation {
197    /// Create a new rule with default settings
198    pub fn new() -> Self {
199        Self::default()
200    }
201
202    /// Detect TOC region using markers
203    fn detect_by_markers(&self, ctx: &LintContext) -> Option<TocRegion> {
204        let mut start_line = None;
205        let mut start_byte = None;
206
207        for (idx, line_info) in ctx.lines.iter().enumerate() {
208            let line_num = idx + 1;
209            let content = line_info.content(ctx.content);
210
211            // Skip if in code block or front matter
212            if line_info.in_code_block || line_info.in_front_matter {
213                continue;
214            }
215
216            // Look for start marker or stop marker
217            if let (Some(s_line), Some(s_byte)) = (start_line, start_byte) {
218                // We have a start, now look for stop marker
219                if TOC_STOP_MARKER.is_match(content) {
220                    let end_line = line_num - 1;
221                    let content_end = line_info.byte_offset;
222
223                    // Handle case where there's no content between markers
224                    if end_line < s_line {
225                        return Some(TocRegion {
226                            start_line: s_line,
227                            end_line: s_line,
228                            content_start: s_byte,
229                            content_end: s_byte,
230                        });
231                    }
232
233                    return Some(TocRegion {
234                        start_line: s_line,
235                        end_line,
236                        content_start: s_byte,
237                        content_end,
238                    });
239                }
240            } else if TOC_START_MARKER.is_match(content) {
241                // TOC content starts on the next line
242                if idx + 1 < ctx.lines.len() {
243                    start_line = Some(line_num + 1);
244                    start_byte = Some(ctx.lines[idx + 1].byte_offset);
245                }
246            }
247        }
248
249        None
250    }
251
252    /// Detect TOC region using markers
253    fn detect_toc_region(&self, ctx: &LintContext) -> Option<TocRegion> {
254        self.detect_by_markers(ctx)
255    }
256
257    /// Extract TOC entries from the detected region
258    fn extract_toc_entries(&self, ctx: &LintContext, region: &TocRegion) -> Vec<TocEntry> {
259        let mut entries = Vec::new();
260
261        for idx in (region.start_line - 1)..region.end_line.min(ctx.lines.len()) {
262            let line_info = &ctx.lines[idx];
263            let content = line_info.content(ctx.content);
264
265            if let Some(caps) = TOC_ENTRY_PATTERN.captures(content) {
266                let indent_spaces = caps.get(1).map_or(0, |m| m.as_str().len());
267                let text = caps.get(2).map_or("", |m| m.as_str()).to_string();
268                let anchor = caps.get(3).map_or("", |m| m.as_str()).to_string();
269
270                entries.push(TocEntry {
271                    text,
272                    anchor,
273                    indent_spaces,
274                });
275            }
276        }
277
278        entries
279    }
280
281    /// Build expected TOC entries from document headings
282    fn build_expected_toc(&self, ctx: &LintContext, toc_region: &TocRegion) -> Vec<ExpectedTocEntry> {
283        let mut entries = Vec::new();
284        let mut fragment_counts: HashMap<String, usize> = HashMap::new();
285
286        for (idx, line_info) in ctx.lines.iter().enumerate() {
287            let line_num = idx + 1;
288
289            // Skip headings before/within the TOC region
290            if line_num <= toc_region.end_line {
291                // Also skip the TOC heading itself for heading-based detection
292                continue;
293            }
294
295            // Skip code blocks, front matter, HTML blocks
296            if line_info.in_code_block || line_info.in_front_matter || line_info.in_html_block {
297                continue;
298            }
299
300            if let Some(heading) = &line_info.heading {
301                // Filter by min/max level
302                if heading.level < self.min_level || heading.level > self.max_level {
303                    continue;
304                }
305
306                // Use custom ID if available, otherwise generate GitHub-style anchor
307                let base_anchor = if let Some(custom_id) = &heading.custom_id {
308                    custom_id.clone()
309                } else {
310                    AnchorStyle::GitHub.generate_fragment(&heading.text)
311                };
312
313                // Handle duplicate anchors
314                let anchor = if let Some(count) = fragment_counts.get_mut(&base_anchor) {
315                    let suffix = *count;
316                    *count += 1;
317                    format!("{base_anchor}-{suffix}")
318                } else {
319                    fragment_counts.insert(base_anchor.clone(), 1);
320                    base_anchor
321                };
322
323                entries.push(ExpectedTocEntry {
324                    heading_line: line_num,
325                    level: heading.level,
326                    text: heading.text.clone(),
327                    anchor,
328                });
329            }
330        }
331
332        entries
333    }
334
335    /// Compare actual TOC entries against expected and find mismatches
336    fn validate_toc(&self, actual: &[TocEntry], expected: &[ExpectedTocEntry]) -> Vec<TocMismatch> {
337        let mut mismatches = Vec::new();
338
339        // Build a map of expected anchors
340        let expected_anchors: HashMap<&str, &ExpectedTocEntry> =
341            expected.iter().map(|e| (e.anchor.as_str(), e)).collect();
342
343        // Build a map of actual anchors
344        let actual_anchors: HashMap<&str, &TocEntry> = actual.iter().map(|e| (e.anchor.as_str(), e)).collect();
345
346        // Check for stale entries (in TOC but not in expected)
347        for entry in actual {
348            if !expected_anchors.contains_key(entry.anchor.as_str()) {
349                mismatches.push(TocMismatch::StaleEntry { entry: entry.clone() });
350            }
351        }
352
353        // Check for missing entries (in expected but not in TOC)
354        for exp in expected {
355            if !actual_anchors.contains_key(exp.anchor.as_str()) {
356                mismatches.push(TocMismatch::MissingEntry { expected: exp.clone() });
357            }
358        }
359
360        // Check for text mismatches (compare stripped versions)
361        for entry in actual {
362            if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
363                // Compare stripped text (removes markdown formatting like links, emphasis)
364                let actual_stripped = strip_markdown_formatting(entry.text.trim());
365                let expected_stripped = strip_markdown_formatting(exp.text.trim());
366                if actual_stripped != expected_stripped {
367                    mismatches.push(TocMismatch::TextMismatch {
368                        entry: entry.clone(),
369                        expected: (*exp).clone(),
370                    });
371                }
372            }
373        }
374
375        // Check for indentation mismatches
376        // Expected indentation is indent spaces per level difference from base level
377        if !expected.is_empty() {
378            let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
379
380            for entry in actual {
381                if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
382                    let level_diff = exp.level.saturating_sub(base_level) as usize;
383                    let expected_indent = level_diff * self.indent;
384
385                    if entry.indent_spaces != expected_indent {
386                        // Don't report indentation mismatch if already reported as text mismatch
387                        let already_reported = mismatches.iter().any(|m| match m {
388                            TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
389                            TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
390                            _ => false,
391                        });
392                        if !already_reported {
393                            mismatches.push(TocMismatch::IndentationMismatch {
394                                entry: entry.clone(),
395                                actual_indent: entry.indent_spaces,
396                                expected_indent,
397                            });
398                        }
399                    }
400                }
401            }
402        }
403
404        // Check order if enforce_order is enabled
405        if self.enforce_order && !actual.is_empty() && !expected.is_empty() {
406            let expected_order: Vec<&str> = expected.iter().map(|e| e.anchor.as_str()).collect();
407
408            // Find entries that exist in both but are out of order
409            let mut expected_idx = 0;
410            for entry in actual {
411                // Skip entries that don't exist in expected
412                if !expected_anchors.contains_key(entry.anchor.as_str()) {
413                    continue;
414                }
415
416                // Find where this anchor should be
417                while expected_idx < expected_order.len() && expected_order[expected_idx] != entry.anchor {
418                    expected_idx += 1;
419                }
420
421                if expected_idx >= expected_order.len() {
422                    // This entry is after where it should be
423                    let correct_pos = expected_order.iter().position(|a| *a == entry.anchor).unwrap_or(0);
424                    // Only add order mismatch if not already reported as stale/text mismatch
425                    let already_reported = mismatches.iter().any(|m| match m {
426                        TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
427                        TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
428                        _ => false,
429                    });
430                    if !already_reported {
431                        mismatches.push(TocMismatch::OrderMismatch {
432                            entry: entry.clone(),
433                            expected_position: correct_pos + 1,
434                        });
435                    }
436                } else {
437                    expected_idx += 1;
438                }
439            }
440        }
441
442        mismatches
443    }
444
445    /// Generate a new TOC from expected entries (always uses nested indentation)
446    fn generate_toc(&self, expected: &[ExpectedTocEntry]) -> String {
447        if expected.is_empty() {
448            return String::new();
449        }
450
451        let mut result = String::new();
452        let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
453        let indent_str = " ".repeat(self.indent);
454
455        for entry in expected {
456            let level_diff = entry.level.saturating_sub(base_level) as usize;
457            let indent = indent_str.repeat(level_diff);
458
459            // Strip markdown formatting from heading text for clean TOC entries
460            let display_text = strip_markdown_formatting(&entry.text);
461            result.push_str(&format!("{indent}- [{display_text}](#{})\n", entry.anchor));
462        }
463
464        result
465    }
466}
467
468impl Rule for MD073TocValidation {
469    fn name(&self) -> &'static str {
470        "MD073"
471    }
472
473    fn description(&self) -> &'static str {
474        "Table of Contents should match document headings"
475    }
476
477    fn should_skip(&self, ctx: &LintContext) -> bool {
478        // Quick check: skip if no TOC markers
479        let has_toc_marker = ctx.content.contains("<!-- toc") || ctx.content.contains("<!--toc");
480        !has_toc_marker
481    }
482
483    fn check(&self, ctx: &LintContext) -> LintResult {
484        let mut warnings = Vec::new();
485
486        // Detect TOC region
487        let Some(region) = self.detect_toc_region(ctx) else {
488            // No TOC found - nothing to validate
489            return Ok(warnings);
490        };
491
492        // Extract actual TOC entries
493        let actual_entries = self.extract_toc_entries(ctx, &region);
494
495        // Build expected TOC from headings
496        let expected_entries = self.build_expected_toc(ctx, &region);
497
498        // If no expected entries and no actual entries, nothing to validate
499        if expected_entries.is_empty() && actual_entries.is_empty() {
500            return Ok(warnings);
501        }
502
503        // Validate
504        let mismatches = self.validate_toc(&actual_entries, &expected_entries);
505
506        if !mismatches.is_empty() {
507            // Generate a single warning at the TOC region with details
508            let mut details = Vec::new();
509
510            for mismatch in &mismatches {
511                match mismatch {
512                    TocMismatch::StaleEntry { entry } => {
513                        details.push(format!("Stale entry: '{}' (heading no longer exists)", entry.text));
514                    }
515                    TocMismatch::MissingEntry { expected } => {
516                        details.push(format!(
517                            "Missing entry: '{}' (line {})",
518                            expected.text, expected.heading_line
519                        ));
520                    }
521                    TocMismatch::TextMismatch { entry, expected } => {
522                        details.push(format!(
523                            "Text mismatch: TOC has '{}', heading is '{}'",
524                            entry.text, expected.text
525                        ));
526                    }
527                    TocMismatch::OrderMismatch {
528                        entry,
529                        expected_position,
530                    } => {
531                        details.push(format!(
532                            "Order mismatch: '{}' should be at position {}",
533                            entry.text, expected_position
534                        ));
535                    }
536                    TocMismatch::IndentationMismatch {
537                        entry,
538                        actual_indent,
539                        expected_indent,
540                        ..
541                    } => {
542                        details.push(format!(
543                            "Indentation mismatch: '{}' has {} spaces, expected {} spaces",
544                            entry.text, actual_indent, expected_indent
545                        ));
546                    }
547                }
548            }
549
550            let message = format!(
551                "Table of Contents does not match document headings: {}",
552                details.join("; ")
553            );
554
555            // Generate fix: replace entire TOC content
556            let new_toc = self.generate_toc(&expected_entries);
557            let fix_range = region.content_start..region.content_end;
558
559            warnings.push(LintWarning {
560                rule_name: Some(self.name().to_string()),
561                message,
562                line: region.start_line,
563                column: 1,
564                end_line: region.end_line,
565                end_column: 1,
566                severity: Severity::Warning,
567                fix: Some(Fix {
568                    range: fix_range,
569                    replacement: new_toc,
570                }),
571            });
572        }
573
574        Ok(warnings)
575    }
576
577    fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
578        // Detect TOC region
579        let Some(region) = self.detect_toc_region(ctx) else {
580            // No TOC found - return unchanged
581            return Ok(ctx.content.to_string());
582        };
583
584        // Build expected TOC from headings
585        let expected_entries = self.build_expected_toc(ctx, &region);
586
587        // Generate new TOC
588        let new_toc = self.generate_toc(&expected_entries);
589
590        // Replace the TOC content
591        let mut result = String::with_capacity(ctx.content.len());
592        result.push_str(&ctx.content[..region.content_start]);
593        result.push_str(&new_toc);
594        result.push_str(&ctx.content[region.content_end..]);
595
596        Ok(result)
597    }
598
599    fn category(&self) -> RuleCategory {
600        RuleCategory::Other
601    }
602
603    fn as_any(&self) -> &dyn std::any::Any {
604        self
605    }
606
607    fn default_config_section(&self) -> Option<(String, toml::Value)> {
608        let value: toml::Value = toml::from_str(
609            r#"
610# Whether this rule is enabled (opt-in, disabled by default)
611enabled = false
612# Minimum heading level to include
613min-level = 2
614# Maximum heading level to include
615max-level = 4
616# Whether TOC order must match document order
617enforce-order = true
618# Indentation per nesting level (defaults to MD007's indent value)
619indent = 2
620"#,
621        )
622        .ok()?;
623        Some(("MD073".to_string(), value))
624    }
625
626    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
627    where
628        Self: Sized,
629    {
630        let mut rule = MD073TocValidation::default();
631        let mut indent_from_md073 = false;
632
633        if let Some(rule_config) = config.rules.get("MD073") {
634            // Parse enabled (opt-in rule, defaults to false)
635            if let Some(enabled) = rule_config.values.get("enabled").and_then(|v| v.as_bool()) {
636                rule.enabled = enabled;
637            }
638
639            // Parse min-level
640            if let Some(min_level) = rule_config.values.get("min-level").and_then(|v| v.as_integer()) {
641                rule.min_level = (min_level.clamp(1, 6)) as u8;
642            }
643
644            // Parse max-level
645            if let Some(max_level) = rule_config.values.get("max-level").and_then(|v| v.as_integer()) {
646                rule.max_level = (max_level.clamp(1, 6)) as u8;
647            }
648
649            // Parse enforce-order
650            if let Some(enforce_order) = rule_config.values.get("enforce-order").and_then(|v| v.as_bool()) {
651                rule.enforce_order = enforce_order;
652            }
653
654            // Parse indent (MD073-specific override)
655            if let Some(indent) = rule_config.values.get("indent").and_then(|v| v.as_integer()) {
656                rule.indent = (indent.clamp(1, 8)) as usize;
657                indent_from_md073 = true;
658            }
659        }
660
661        // If indent not explicitly set in MD073, read from MD007 config
662        if !indent_from_md073
663            && let Some(md007_config) = config.rules.get("MD007")
664            && let Some(indent) = md007_config.values.get("indent").and_then(|v| v.as_integer())
665        {
666            rule.indent = (indent.clamp(1, 8)) as usize;
667        }
668
669        Box::new(rule)
670    }
671}
672
673#[cfg(test)]
674mod tests {
675    use super::*;
676    use crate::config::MarkdownFlavor;
677
678    fn create_ctx(content: &str) -> LintContext<'_> {
679        LintContext::new(content, MarkdownFlavor::Standard, None)
680    }
681
682    /// Create rule with enabled=true for tests that call check() directly
683    fn create_enabled_rule() -> MD073TocValidation {
684        MD073TocValidation {
685            enabled: true,
686            ..MD073TocValidation::default()
687        }
688    }
689
690    // ========== Detection Tests ==========
691
692    #[test]
693    fn test_detect_markers_basic() {
694        let rule = MD073TocValidation::new();
695        let content = r#"# Title
696
697<!-- toc -->
698
699- [Heading 1](#heading-1)
700
701<!-- tocstop -->
702
703## Heading 1
704
705Content here.
706"#;
707        let ctx = create_ctx(content);
708        let region = rule.detect_by_markers(&ctx);
709        assert!(region.is_some());
710        let region = region.unwrap();
711        // Verify region boundaries are detected correctly
712        assert_eq!(region.start_line, 4);
713        assert_eq!(region.end_line, 6);
714    }
715
716    #[test]
717    fn test_detect_markers_variations() {
718        let rule = MD073TocValidation::new();
719
720        // Test <!--toc--> (no spaces)
721        let content1 = "<!--toc-->\n- [A](#a)\n<!--tocstop-->\n";
722        let ctx1 = create_ctx(content1);
723        assert!(rule.detect_by_markers(&ctx1).is_some());
724
725        // Test <!-- TOC --> (uppercase)
726        let content2 = "<!-- TOC -->\n- [A](#a)\n<!-- TOCSTOP -->\n";
727        let ctx2 = create_ctx(content2);
728        assert!(rule.detect_by_markers(&ctx2).is_some());
729
730        // Test <!-- /toc --> (alternative stop marker)
731        let content3 = "<!-- toc -->\n- [A](#a)\n<!-- /toc -->\n";
732        let ctx3 = create_ctx(content3);
733        assert!(rule.detect_by_markers(&ctx3).is_some());
734    }
735
736    #[test]
737    fn test_no_toc_region() {
738        let rule = MD073TocValidation::new();
739        let content = r#"# Title
740
741## Heading 1
742
743Content here.
744
745## Heading 2
746
747More content.
748"#;
749        let ctx = create_ctx(content);
750        let region = rule.detect_toc_region(&ctx);
751        assert!(region.is_none());
752    }
753
754    // ========== Validation Tests ==========
755
756    #[test]
757    fn test_toc_matches_headings() {
758        let rule = create_enabled_rule();
759        let content = r#"# Title
760
761<!-- toc -->
762
763- [Heading 1](#heading-1)
764- [Heading 2](#heading-2)
765
766<!-- tocstop -->
767
768## Heading 1
769
770Content.
771
772## Heading 2
773
774More content.
775"#;
776        let ctx = create_ctx(content);
777        let result = rule.check(&ctx).unwrap();
778        assert!(result.is_empty(), "Expected no warnings for matching TOC");
779    }
780
781    #[test]
782    fn test_missing_entry() {
783        let rule = create_enabled_rule();
784        let content = r#"# Title
785
786<!-- toc -->
787
788- [Heading 1](#heading-1)
789
790<!-- tocstop -->
791
792## Heading 1
793
794Content.
795
796## Heading 2
797
798New heading not in TOC.
799"#;
800        let ctx = create_ctx(content);
801        let result = rule.check(&ctx).unwrap();
802        assert_eq!(result.len(), 1);
803        assert!(result[0].message.contains("Missing entry"));
804        assert!(result[0].message.contains("Heading 2"));
805    }
806
807    #[test]
808    fn test_stale_entry() {
809        let rule = create_enabled_rule();
810        let content = r#"# Title
811
812<!-- toc -->
813
814- [Heading 1](#heading-1)
815- [Deleted Heading](#deleted-heading)
816
817<!-- tocstop -->
818
819## Heading 1
820
821Content.
822"#;
823        let ctx = create_ctx(content);
824        let result = rule.check(&ctx).unwrap();
825        assert_eq!(result.len(), 1);
826        assert!(result[0].message.contains("Stale entry"));
827        assert!(result[0].message.contains("Deleted Heading"));
828    }
829
830    #[test]
831    fn test_text_mismatch() {
832        let rule = create_enabled_rule();
833        let content = r#"# Title
834
835<!-- toc -->
836
837- [Old Name](#heading-1)
838
839<!-- tocstop -->
840
841## Heading 1
842
843Content.
844"#;
845        let ctx = create_ctx(content);
846        let result = rule.check(&ctx).unwrap();
847        assert_eq!(result.len(), 1);
848        assert!(result[0].message.contains("Text mismatch"));
849    }
850
851    // ========== Level Filtering Tests ==========
852
853    #[test]
854    fn test_min_level_excludes_h1() {
855        let mut rule = MD073TocValidation::new();
856        rule.min_level = 2;
857
858        let content = r#"<!-- toc -->
859
860<!-- tocstop -->
861
862# Should Be Excluded
863
864## Should Be Included
865
866Content.
867"#;
868        let ctx = create_ctx(content);
869        let region = rule.detect_toc_region(&ctx).unwrap();
870        let expected = rule.build_expected_toc(&ctx, &region);
871
872        assert_eq!(expected.len(), 1);
873        assert_eq!(expected[0].text, "Should Be Included");
874    }
875
876    #[test]
877    fn test_max_level_excludes_h5_h6() {
878        let mut rule = MD073TocValidation::new();
879        rule.max_level = 4;
880
881        let content = r#"<!-- toc -->
882
883<!-- tocstop -->
884
885## Level 2
886
887### Level 3
888
889#### Level 4
890
891##### Level 5 Should Be Excluded
892
893###### Level 6 Should Be Excluded
894"#;
895        let ctx = create_ctx(content);
896        let region = rule.detect_toc_region(&ctx).unwrap();
897        let expected = rule.build_expected_toc(&ctx, &region);
898
899        assert_eq!(expected.len(), 3);
900        assert!(expected.iter().all(|e| e.level <= 4));
901    }
902
903    // ========== Fix Tests ==========
904
905    #[test]
906    fn test_fix_adds_missing_entry() {
907        let rule = MD073TocValidation::new();
908        let content = r#"# Title
909
910<!-- toc -->
911
912- [Heading 1](#heading-1)
913
914<!-- tocstop -->
915
916## Heading 1
917
918Content.
919
920## Heading 2
921
922New heading.
923"#;
924        let ctx = create_ctx(content);
925        let fixed = rule.fix(&ctx).unwrap();
926        assert!(fixed.contains("- [Heading 2](#heading-2)"));
927    }
928
929    #[test]
930    fn test_fix_removes_stale_entry() {
931        let rule = MD073TocValidation::new();
932        let content = r#"# Title
933
934<!-- toc -->
935
936- [Heading 1](#heading-1)
937- [Deleted](#deleted)
938
939<!-- tocstop -->
940
941## Heading 1
942
943Content.
944"#;
945        let ctx = create_ctx(content);
946        let fixed = rule.fix(&ctx).unwrap();
947        assert!(fixed.contains("- [Heading 1](#heading-1)"));
948        assert!(!fixed.contains("Deleted"));
949    }
950
951    #[test]
952    fn test_fix_idempotent() {
953        let rule = MD073TocValidation::new();
954        let content = r#"# Title
955
956<!-- toc -->
957
958- [Heading 1](#heading-1)
959- [Heading 2](#heading-2)
960
961<!-- tocstop -->
962
963## Heading 1
964
965Content.
966
967## Heading 2
968
969More.
970"#;
971        let ctx = create_ctx(content);
972        let fixed1 = rule.fix(&ctx).unwrap();
973        let ctx2 = create_ctx(&fixed1);
974        let fixed2 = rule.fix(&ctx2).unwrap();
975
976        // Second fix should produce same output
977        assert_eq!(fixed1, fixed2);
978    }
979
980    #[test]
981    fn test_fix_preserves_markers() {
982        let rule = MD073TocValidation::new();
983        let content = r#"# Title
984
985<!-- toc -->
986
987Old TOC content.
988
989<!-- tocstop -->
990
991## New Heading
992
993Content.
994"#;
995        let ctx = create_ctx(content);
996        let fixed = rule.fix(&ctx).unwrap();
997
998        // Markers should still be present
999        assert!(fixed.contains("<!-- toc -->"));
1000        assert!(fixed.contains("<!-- tocstop -->"));
1001        // New content should be generated
1002        assert!(fixed.contains("- [New Heading](#new-heading)"));
1003    }
1004
1005    #[test]
1006    fn test_fix_requires_markers() {
1007        let rule = create_enabled_rule();
1008
1009        // Document without markers - no TOC detected, no changes
1010        let content_no_markers = r#"# Title
1011
1012## Heading 1
1013
1014Content.
1015"#;
1016        let ctx = create_ctx(content_no_markers);
1017        let fixed = rule.fix(&ctx).unwrap();
1018        assert_eq!(fixed, content_no_markers);
1019
1020        // Document with markers - TOC detected and fixed
1021        let content_markers = r#"# Title
1022
1023<!-- toc -->
1024
1025- [Old Entry](#old-entry)
1026
1027<!-- tocstop -->
1028
1029## Heading 1
1030
1031Content.
1032"#;
1033        let ctx = create_ctx(content_markers);
1034        let fixed = rule.fix(&ctx).unwrap();
1035        assert!(fixed.contains("- [Heading 1](#heading-1)"));
1036        assert!(!fixed.contains("Old Entry"));
1037    }
1038
1039    // ========== Anchor Tests ==========
1040
1041    #[test]
1042    fn test_duplicate_heading_anchors() {
1043        let rule = MD073TocValidation::new();
1044        let content = r#"# Title
1045
1046<!-- toc -->
1047
1048<!-- tocstop -->
1049
1050## Duplicate
1051
1052Content.
1053
1054## Duplicate
1055
1056More content.
1057
1058## Duplicate
1059
1060Even more.
1061"#;
1062        let ctx = create_ctx(content);
1063        let region = rule.detect_toc_region(&ctx).unwrap();
1064        let expected = rule.build_expected_toc(&ctx, &region);
1065
1066        assert_eq!(expected.len(), 3);
1067        assert_eq!(expected[0].anchor, "duplicate");
1068        assert_eq!(expected[1].anchor, "duplicate-1");
1069        assert_eq!(expected[2].anchor, "duplicate-2");
1070    }
1071
1072    // ========== Edge Cases ==========
1073
1074    #[test]
1075    fn test_headings_in_code_blocks_ignored() {
1076        let rule = create_enabled_rule();
1077        let content = r#"# Title
1078
1079<!-- toc -->
1080
1081- [Real Heading](#real-heading)
1082
1083<!-- tocstop -->
1084
1085## Real Heading
1086
1087```markdown
1088## Fake Heading In Code
1089```
1090
1091Content.
1092"#;
1093        let ctx = create_ctx(content);
1094        let result = rule.check(&ctx).unwrap();
1095        assert!(result.is_empty(), "Should not report fake heading in code block");
1096    }
1097
1098    #[test]
1099    fn test_empty_toc_region() {
1100        let rule = create_enabled_rule();
1101        let content = r#"# Title
1102
1103<!-- toc -->
1104<!-- tocstop -->
1105
1106## Heading 1
1107
1108Content.
1109"#;
1110        let ctx = create_ctx(content);
1111        let result = rule.check(&ctx).unwrap();
1112        assert_eq!(result.len(), 1);
1113        assert!(result[0].message.contains("Missing entry"));
1114    }
1115
1116    #[test]
1117    fn test_nested_indentation() {
1118        let rule = create_enabled_rule();
1119
1120        let content = r#"<!-- toc -->
1121
1122<!-- tocstop -->
1123
1124## Level 2
1125
1126### Level 3
1127
1128#### Level 4
1129
1130## Another Level 2
1131"#;
1132        let ctx = create_ctx(content);
1133        let region = rule.detect_toc_region(&ctx).unwrap();
1134        let expected = rule.build_expected_toc(&ctx, &region);
1135        let toc = rule.generate_toc(&expected);
1136
1137        // Check indentation (always nested)
1138        assert!(toc.contains("- [Level 2](#level-2)"));
1139        assert!(toc.contains("  - [Level 3](#level-3)"));
1140        assert!(toc.contains("    - [Level 4](#level-4)"));
1141        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1142    }
1143
1144    // ========== Indentation Mismatch Tests ==========
1145
1146    #[test]
1147    fn test_indentation_mismatch_detected() {
1148        let rule = create_enabled_rule();
1149        // TOC entries are all at same indentation level, but headings have different levels
1150        let content = r#"<!-- toc -->
1151- [Hello](#hello)
1152- [Another](#another)
1153- [Heading](#heading)
1154<!-- tocstop -->
1155
1156## Hello
1157
1158### Another
1159
1160## Heading
1161"#;
1162        let ctx = create_ctx(content);
1163        let result = rule.check(&ctx).unwrap();
1164        // Should detect indentation mismatch - "Another" is level 3 but has no indent
1165        assert_eq!(result.len(), 1, "Should report indentation mismatch: {result:?}");
1166        assert!(
1167            result[0].message.contains("Indentation mismatch"),
1168            "Message should mention indentation: {}",
1169            result[0].message
1170        );
1171        assert!(
1172            result[0].message.contains("Another"),
1173            "Message should mention the entry: {}",
1174            result[0].message
1175        );
1176    }
1177
1178    #[test]
1179    fn test_indentation_mismatch_fixed() {
1180        let rule = create_enabled_rule();
1181        // TOC entries are all at same indentation level, but headings have different levels
1182        let content = r#"<!-- toc -->
1183- [Hello](#hello)
1184- [Another](#another)
1185- [Heading](#heading)
1186<!-- tocstop -->
1187
1188## Hello
1189
1190### Another
1191
1192## Heading
1193"#;
1194        let ctx = create_ctx(content);
1195        let fixed = rule.fix(&ctx).unwrap();
1196        // After fix, "Another" should be indented
1197        assert!(fixed.contains("- [Hello](#hello)"));
1198        assert!(fixed.contains("  - [Another](#another)")); // Indented with 2 spaces
1199        assert!(fixed.contains("- [Heading](#heading)"));
1200    }
1201
1202    #[test]
1203    fn test_no_indentation_mismatch_when_correct() {
1204        let rule = create_enabled_rule();
1205        // TOC has correct indentation
1206        let content = r#"<!-- toc -->
1207- [Hello](#hello)
1208  - [Another](#another)
1209- [Heading](#heading)
1210<!-- tocstop -->
1211
1212## Hello
1213
1214### Another
1215
1216## Heading
1217"#;
1218        let ctx = create_ctx(content);
1219        let result = rule.check(&ctx).unwrap();
1220        // Should not report any issues - indentation is correct
1221        assert!(result.is_empty(), "Should not report issues: {result:?}");
1222    }
1223
1224    // ========== Order Mismatch Tests ==========
1225
1226    #[test]
1227    fn test_order_mismatch_detected() {
1228        let rule = create_enabled_rule();
1229        let content = r#"# Title
1230
1231<!-- toc -->
1232
1233- [Section B](#section-b)
1234- [Section A](#section-a)
1235
1236<!-- tocstop -->
1237
1238## Section A
1239
1240Content A.
1241
1242## Section B
1243
1244Content B.
1245"#;
1246        let ctx = create_ctx(content);
1247        let result = rule.check(&ctx).unwrap();
1248        // Should detect order mismatch - Section B appears before Section A in TOC
1249        // but Section A comes first in document
1250        assert!(!result.is_empty(), "Should detect order mismatch");
1251    }
1252
1253    #[test]
1254    fn test_order_mismatch_ignored_when_disabled() {
1255        let mut rule = create_enabled_rule();
1256        rule.enforce_order = false;
1257        let content = r#"# Title
1258
1259<!-- toc -->
1260
1261- [Section B](#section-b)
1262- [Section A](#section-a)
1263
1264<!-- tocstop -->
1265
1266## Section A
1267
1268Content A.
1269
1270## Section B
1271
1272Content B.
1273"#;
1274        let ctx = create_ctx(content);
1275        let result = rule.check(&ctx).unwrap();
1276        // With enforce_order=false, order mismatches should be ignored
1277        assert!(result.is_empty(), "Should not report order mismatch when disabled");
1278    }
1279
1280    // ========== Unicode and Special Characters Tests ==========
1281
1282    #[test]
1283    fn test_unicode_headings() {
1284        let rule = create_enabled_rule();
1285        let content = r#"# Title
1286
1287<!-- toc -->
1288
1289- [日本語の見出し](#日本語の見出し)
1290- [Émojis 🎉](#émojis-)
1291
1292<!-- tocstop -->
1293
1294## 日本語の見出し
1295
1296Japanese content.
1297
1298## Émojis 🎉
1299
1300Content with emojis.
1301"#;
1302        let ctx = create_ctx(content);
1303        let result = rule.check(&ctx).unwrap();
1304        // Should handle unicode correctly
1305        assert!(result.is_empty(), "Should handle unicode headings");
1306    }
1307
1308    #[test]
1309    fn test_special_characters_in_headings() {
1310        let rule = create_enabled_rule();
1311        let content = r#"# Title
1312
1313<!-- toc -->
1314
1315- [What's New?](#whats-new)
1316- [C++ Guide](#c-guide)
1317
1318<!-- tocstop -->
1319
1320## What's New?
1321
1322News content.
1323
1324## C++ Guide
1325
1326C++ content.
1327"#;
1328        let ctx = create_ctx(content);
1329        let result = rule.check(&ctx).unwrap();
1330        assert!(result.is_empty(), "Should handle special characters");
1331    }
1332
1333    #[test]
1334    fn test_code_spans_in_headings() {
1335        let rule = create_enabled_rule();
1336        let content = r#"# Title
1337
1338<!-- toc -->
1339
1340- [`check [PATHS...]`](#check-paths)
1341
1342<!-- tocstop -->
1343
1344## `check [PATHS...]`
1345
1346Command documentation.
1347"#;
1348        let ctx = create_ctx(content);
1349        let result = rule.check(&ctx).unwrap();
1350        assert!(result.is_empty(), "Should handle code spans in headings with brackets");
1351    }
1352
1353    // ========== Config Tests ==========
1354
1355    #[test]
1356    fn test_from_config_defaults() {
1357        let config = crate::config::Config::default();
1358        let rule = MD073TocValidation::from_config(&config);
1359        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1360
1361        assert_eq!(rule.min_level, 2);
1362        assert_eq!(rule.max_level, 4);
1363        assert!(rule.enforce_order);
1364        assert_eq!(rule.indent, 2);
1365    }
1366
1367    #[test]
1368    fn test_indent_from_md007_config() {
1369        use crate::config::{Config, RuleConfig};
1370        use std::collections::BTreeMap;
1371
1372        let mut config = Config::default();
1373
1374        // Set MD007 indent to 4
1375        let mut md007_values = BTreeMap::new();
1376        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1377        config.rules.insert(
1378            "MD007".to_string(),
1379            RuleConfig {
1380                severity: None,
1381                values: md007_values,
1382            },
1383        );
1384
1385        let rule = MD073TocValidation::from_config(&config);
1386        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1387
1388        assert_eq!(rule.indent, 4, "Should read indent from MD007 config");
1389    }
1390
1391    #[test]
1392    fn test_indent_md073_overrides_md007() {
1393        use crate::config::{Config, RuleConfig};
1394        use std::collections::BTreeMap;
1395
1396        let mut config = Config::default();
1397
1398        // Set MD007 indent to 4
1399        let mut md007_values = BTreeMap::new();
1400        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1401        config.rules.insert(
1402            "MD007".to_string(),
1403            RuleConfig {
1404                severity: None,
1405                values: md007_values,
1406            },
1407        );
1408
1409        // Set MD073 indent to 3 (should override MD007)
1410        let mut md073_values = BTreeMap::new();
1411        md073_values.insert("enabled".to_string(), toml::Value::Boolean(true));
1412        md073_values.insert("indent".to_string(), toml::Value::Integer(3));
1413        config.rules.insert(
1414            "MD073".to_string(),
1415            RuleConfig {
1416                severity: None,
1417                values: md073_values,
1418            },
1419        );
1420
1421        let rule = MD073TocValidation::from_config(&config);
1422        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1423
1424        assert_eq!(rule.indent, 3, "MD073 indent should override MD007");
1425    }
1426
1427    #[test]
1428    fn test_generate_toc_with_4_space_indent() {
1429        let mut rule = create_enabled_rule();
1430        rule.indent = 4;
1431
1432        let content = r#"<!-- toc -->
1433
1434<!-- tocstop -->
1435
1436## Level 2
1437
1438### Level 3
1439
1440#### Level 4
1441
1442## Another Level 2
1443"#;
1444        let ctx = create_ctx(content);
1445        let region = rule.detect_toc_region(&ctx).unwrap();
1446        let expected = rule.build_expected_toc(&ctx, &region);
1447        let toc = rule.generate_toc(&expected);
1448
1449        // With 4-space indent:
1450        // Level 2 = 0 spaces (base level)
1451        // Level 3 = 4 spaces
1452        // Level 4 = 8 spaces
1453        assert!(toc.contains("- [Level 2](#level-2)"), "Level 2 should have no indent");
1454        assert!(
1455            toc.contains("    - [Level 3](#level-3)"),
1456            "Level 3 should have 4-space indent"
1457        );
1458        assert!(
1459            toc.contains("        - [Level 4](#level-4)"),
1460            "Level 4 should have 8-space indent"
1461        );
1462        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1463    }
1464
1465    #[test]
1466    fn test_validate_toc_with_4_space_indent() {
1467        let mut rule = create_enabled_rule();
1468        rule.indent = 4;
1469
1470        // TOC with correct 4-space indentation
1471        let content = r#"<!-- toc -->
1472- [Hello](#hello)
1473    - [Another](#another)
1474- [Heading](#heading)
1475<!-- tocstop -->
1476
1477## Hello
1478
1479### Another
1480
1481## Heading
1482"#;
1483        let ctx = create_ctx(content);
1484        let result = rule.check(&ctx).unwrap();
1485        assert!(
1486            result.is_empty(),
1487            "Should accept 4-space indent when configured: {result:?}"
1488        );
1489    }
1490
1491    #[test]
1492    fn test_validate_toc_wrong_indent_with_4_space_config() {
1493        let mut rule = create_enabled_rule();
1494        rule.indent = 4;
1495
1496        // TOC with 2-space indentation (wrong when 4-space is configured)
1497        let content = r#"<!-- toc -->
1498- [Hello](#hello)
1499  - [Another](#another)
1500- [Heading](#heading)
1501<!-- tocstop -->
1502
1503## Hello
1504
1505### Another
1506
1507## Heading
1508"#;
1509        let ctx = create_ctx(content);
1510        let result = rule.check(&ctx).unwrap();
1511        assert_eq!(result.len(), 1, "Should detect wrong indent");
1512        assert!(
1513            result[0].message.contains("Indentation mismatch"),
1514            "Should report indentation mismatch: {}",
1515            result[0].message
1516        );
1517        assert!(
1518            result[0].message.contains("expected 4 spaces"),
1519            "Should mention expected 4 spaces: {}",
1520            result[0].message
1521        );
1522    }
1523
1524    // ========== Markdown Stripping Tests ==========
1525
1526    #[test]
1527    fn test_strip_markdown_formatting_link() {
1528        let result = strip_markdown_formatting("Tool: [terminal](https://example.com)");
1529        assert_eq!(result, "Tool: terminal");
1530    }
1531
1532    #[test]
1533    fn test_strip_markdown_formatting_bold() {
1534        let result = strip_markdown_formatting("This is **bold** text");
1535        assert_eq!(result, "This is bold text");
1536
1537        let result = strip_markdown_formatting("This is __bold__ text");
1538        assert_eq!(result, "This is bold text");
1539    }
1540
1541    #[test]
1542    fn test_strip_markdown_formatting_italic() {
1543        let result = strip_markdown_formatting("This is *italic* text");
1544        assert_eq!(result, "This is italic text");
1545
1546        let result = strip_markdown_formatting("This is _italic_ text");
1547        assert_eq!(result, "This is italic text");
1548    }
1549
1550    #[test]
1551    fn test_strip_markdown_formatting_code_span() {
1552        let result = strip_markdown_formatting("Use the `format` function");
1553        assert_eq!(result, "Use the format function");
1554    }
1555
1556    #[test]
1557    fn test_strip_markdown_formatting_image() {
1558        let result = strip_markdown_formatting("See ![logo](image.png) for details");
1559        assert_eq!(result, "See logo for details");
1560    }
1561
1562    #[test]
1563    fn test_strip_markdown_formatting_reference_link() {
1564        let result = strip_markdown_formatting("See [documentation][docs] for details");
1565        assert_eq!(result, "See documentation for details");
1566    }
1567
1568    #[test]
1569    fn test_strip_markdown_formatting_combined() {
1570        // Link is stripped first, leaving bold, then bold is stripped
1571        let result = strip_markdown_formatting("Tool: [**terminal**](https://example.com)");
1572        assert_eq!(result, "Tool: terminal");
1573    }
1574
1575    #[test]
1576    fn test_toc_with_link_in_heading_matches_stripped_text() {
1577        let rule = create_enabled_rule();
1578
1579        // TOC entry text matches the stripped heading text
1580        let content = r#"# Title
1581
1582<!-- toc -->
1583
1584- [Tool: terminal](#tool-terminal)
1585
1586<!-- tocstop -->
1587
1588## Tool: [terminal](https://example.com)
1589
1590Content here.
1591"#;
1592        let ctx = create_ctx(content);
1593        let result = rule.check(&ctx).unwrap();
1594        assert!(
1595            result.is_empty(),
1596            "Stripped heading text should match TOC entry: {result:?}"
1597        );
1598    }
1599
1600    #[test]
1601    fn test_toc_with_simplified_text_still_mismatches() {
1602        let rule = create_enabled_rule();
1603
1604        // TOC entry "terminal" does NOT match stripped heading "Tool: terminal"
1605        let content = r#"# Title
1606
1607<!-- toc -->
1608
1609- [terminal](#tool-terminal)
1610
1611<!-- tocstop -->
1612
1613## Tool: [terminal](https://example.com)
1614
1615Content here.
1616"#;
1617        let ctx = create_ctx(content);
1618        let result = rule.check(&ctx).unwrap();
1619        assert_eq!(result.len(), 1, "Should report text mismatch");
1620        assert!(result[0].message.contains("Text mismatch"));
1621    }
1622
1623    #[test]
1624    fn test_fix_generates_stripped_toc_entries() {
1625        let rule = MD073TocValidation::new();
1626        let content = r#"# Title
1627
1628<!-- toc -->
1629
1630<!-- tocstop -->
1631
1632## Tool: [busybox](https://www.busybox.net/)
1633
1634Content.
1635
1636## Tool: [mount](https://en.wikipedia.org/wiki/Mount)
1637
1638More content.
1639"#;
1640        let ctx = create_ctx(content);
1641        let fixed = rule.fix(&ctx).unwrap();
1642
1643        // Generated TOC should have stripped text (links removed)
1644        assert!(
1645            fixed.contains("- [Tool: busybox](#tool-busybox)"),
1646            "TOC entry should have stripped link text"
1647        );
1648        assert!(
1649            fixed.contains("- [Tool: mount](#tool-mount)"),
1650            "TOC entry should have stripped link text"
1651        );
1652        // TOC entries should NOT contain the URL (the actual headings in the document still will)
1653        // Check only within the TOC region (between toc markers)
1654        let toc_start = fixed.find("<!-- toc -->").unwrap();
1655        let toc_end = fixed.find("<!-- tocstop -->").unwrap();
1656        let toc_content = &fixed[toc_start..toc_end];
1657        assert!(
1658            !toc_content.contains("busybox.net"),
1659            "TOC should not contain URLs: {toc_content}"
1660        );
1661        assert!(
1662            !toc_content.contains("wikipedia.org"),
1663            "TOC should not contain URLs: {toc_content}"
1664        );
1665    }
1666
1667    #[test]
1668    fn test_fix_with_bold_in_heading() {
1669        let rule = MD073TocValidation::new();
1670        let content = r#"# Title
1671
1672<!-- toc -->
1673
1674<!-- tocstop -->
1675
1676## **Important** Section
1677
1678Content.
1679"#;
1680        let ctx = create_ctx(content);
1681        let fixed = rule.fix(&ctx).unwrap();
1682
1683        // Generated TOC should have stripped text (bold markers removed)
1684        assert!(fixed.contains("- [Important Section](#important-section)"));
1685    }
1686
1687    #[test]
1688    fn test_fix_with_code_in_heading() {
1689        let rule = MD073TocValidation::new();
1690        let content = r#"# Title
1691
1692<!-- toc -->
1693
1694<!-- tocstop -->
1695
1696## Using `async` Functions
1697
1698Content.
1699"#;
1700        let ctx = create_ctx(content);
1701        let fixed = rule.fix(&ctx).unwrap();
1702
1703        // Generated TOC should have stripped text (backticks removed)
1704        assert!(fixed.contains("- [Using async Functions](#using-async-functions)"));
1705    }
1706
1707    // ========== Custom Anchor Tests ==========
1708
1709    #[test]
1710    fn test_custom_anchor_id_respected() {
1711        let rule = create_enabled_rule();
1712        let content = r#"# Title
1713
1714<!-- toc -->
1715
1716- [My Section](#my-custom-anchor)
1717
1718<!-- tocstop -->
1719
1720## My Section {#my-custom-anchor}
1721
1722Content here.
1723"#;
1724        let ctx = create_ctx(content);
1725        let result = rule.check(&ctx).unwrap();
1726        assert!(result.is_empty(), "Should respect custom anchor IDs: {result:?}");
1727    }
1728
1729    #[test]
1730    fn test_custom_anchor_id_in_generated_toc() {
1731        let rule = create_enabled_rule();
1732        let content = r#"# Title
1733
1734<!-- toc -->
1735
1736<!-- tocstop -->
1737
1738## First Section {#custom-first}
1739
1740Content.
1741
1742## Second Section {#another-custom}
1743
1744More content.
1745"#;
1746        let ctx = create_ctx(content);
1747        let fixed = rule.fix(&ctx).unwrap();
1748        assert!(fixed.contains("- [First Section](#custom-first)"));
1749        assert!(fixed.contains("- [Second Section](#another-custom)"));
1750    }
1751
1752    #[test]
1753    fn test_mixed_custom_and_generated_anchors() {
1754        let rule = create_enabled_rule();
1755        let content = r#"# Title
1756
1757<!-- toc -->
1758
1759- [Custom Section](#my-id)
1760- [Normal Section](#normal-section)
1761
1762<!-- tocstop -->
1763
1764## Custom Section {#my-id}
1765
1766Content.
1767
1768## Normal Section
1769
1770More content.
1771"#;
1772        let ctx = create_ctx(content);
1773        let result = rule.check(&ctx).unwrap();
1774        assert!(result.is_empty(), "Should handle mixed custom and generated anchors");
1775    }
1776
1777    // ========== Anchor Generation Tests ==========
1778
1779    #[test]
1780    fn test_github_anchor_style() {
1781        let rule = create_enabled_rule();
1782
1783        let content = r#"<!-- toc -->
1784
1785<!-- tocstop -->
1786
1787## Test_With_Underscores
1788
1789Content.
1790"#;
1791        let ctx = create_ctx(content);
1792        let region = rule.detect_toc_region(&ctx).unwrap();
1793        let expected = rule.build_expected_toc(&ctx, &region);
1794
1795        // GitHub-style anchors preserve underscores
1796        assert_eq!(expected[0].anchor, "test_with_underscores");
1797    }
1798
1799    // ========== Stress Tests ==========
1800
1801    #[test]
1802    fn test_stress_many_headings() {
1803        let rule = create_enabled_rule();
1804
1805        // Generate a document with 150 headings
1806        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1807
1808        for i in 1..=150 {
1809            content.push_str(&format!("## Heading Number {i}\n\nContent for section {i}.\n\n"));
1810        }
1811
1812        let ctx = create_ctx(&content);
1813
1814        // Should not panic or timeout
1815        let result = rule.check(&ctx).unwrap();
1816
1817        // Should report missing entries for all 150 headings
1818        assert_eq!(result.len(), 1, "Should report single warning for TOC");
1819        assert!(result[0].message.contains("Missing entry"));
1820
1821        // Fix should generate TOC with 150 entries
1822        let fixed = rule.fix(&ctx).unwrap();
1823        assert!(fixed.contains("- [Heading Number 1](#heading-number-1)"));
1824        assert!(fixed.contains("- [Heading Number 100](#heading-number-100)"));
1825        assert!(fixed.contains("- [Heading Number 150](#heading-number-150)"));
1826    }
1827
1828    #[test]
1829    fn test_stress_deeply_nested() {
1830        let rule = create_enabled_rule();
1831        let content = r#"# Title
1832
1833<!-- toc -->
1834
1835<!-- tocstop -->
1836
1837## Level 2 A
1838
1839### Level 3 A
1840
1841#### Level 4 A
1842
1843## Level 2 B
1844
1845### Level 3 B
1846
1847#### Level 4 B
1848
1849## Level 2 C
1850
1851### Level 3 C
1852
1853#### Level 4 C
1854
1855## Level 2 D
1856
1857### Level 3 D
1858
1859#### Level 4 D
1860"#;
1861        let ctx = create_ctx(content);
1862        let fixed = rule.fix(&ctx).unwrap();
1863
1864        // Check nested indentation is correct
1865        assert!(fixed.contains("- [Level 2 A](#level-2-a)"));
1866        assert!(fixed.contains("  - [Level 3 A](#level-3-a)"));
1867        assert!(fixed.contains("    - [Level 4 A](#level-4-a)"));
1868        assert!(fixed.contains("- [Level 2 D](#level-2-d)"));
1869        assert!(fixed.contains("  - [Level 3 D](#level-3-d)"));
1870        assert!(fixed.contains("    - [Level 4 D](#level-4-d)"));
1871    }
1872
1873    #[test]
1874    fn test_stress_many_duplicates() {
1875        let rule = create_enabled_rule();
1876
1877        // Generate 50 headings with the same text
1878        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1879        for _ in 0..50 {
1880            content.push_str("## FAQ\n\nContent.\n\n");
1881        }
1882
1883        let ctx = create_ctx(&content);
1884        let region = rule.detect_toc_region(&ctx).unwrap();
1885        let expected = rule.build_expected_toc(&ctx, &region);
1886
1887        // Should generate unique anchors for all 50
1888        assert_eq!(expected.len(), 50);
1889        assert_eq!(expected[0].anchor, "faq");
1890        assert_eq!(expected[1].anchor, "faq-1");
1891        assert_eq!(expected[49].anchor, "faq-49");
1892    }
1893}