Skip to main content

rumdl_lib/rules/
md073_toc_validation.rs

1//! MD073: Table of Contents validation rule
2//!
3//! Validates that TOC sections match the actual document headings.
4
5use crate::lint_context::LintContext;
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::anchor_styles::AnchorStyle;
8use regex::Regex;
9use std::collections::HashMap;
10use std::sync::LazyLock;
11
12/// Regex for TOC start marker: `<!-- toc -->` with optional whitespace variations
13static TOC_START_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*toc\s*-->").unwrap());
14
15/// Regex for TOC stop marker: `<!-- tocstop -->` or `<!-- /toc -->`
16static TOC_STOP_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*(?:tocstop|/toc)\s*-->").unwrap());
17
18/// Regex for extracting TOC entries: `- [text](#anchor)` or `* [text](#anchor)`
19/// with optional leading whitespace for nested items
20/// Handles nested brackets like `[`check [PATHS...]`](#check-paths)`
21static TOC_ENTRY_PATTERN: LazyLock<Regex> =
22    LazyLock::new(|| Regex::new(r"^(\s*)[-*]\s+\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]\(#([^)]+)\)").unwrap());
23
24/// Represents a detected TOC region in the document
25#[derive(Debug, Clone)]
26struct TocRegion {
27    /// 1-indexed start line of the TOC content (after the marker)
28    start_line: usize,
29    /// 1-indexed end line of the TOC content (before the stop marker)
30    end_line: usize,
31    /// Byte offset where TOC content starts
32    content_start: usize,
33    /// Byte offset where TOC content ends
34    content_end: usize,
35}
36
37/// A parsed TOC entry from the existing TOC
38#[derive(Debug, Clone)]
39struct TocEntry {
40    /// Display text of the link
41    text: String,
42    /// Anchor/fragment (without #)
43    anchor: String,
44}
45
46/// An expected TOC entry generated from document headings
47#[derive(Debug, Clone)]
48struct ExpectedTocEntry {
49    /// 1-indexed line number of the heading
50    heading_line: usize,
51    /// Heading level (1-6)
52    level: u8,
53    /// Heading text (for display)
54    text: String,
55    /// Generated anchor
56    anchor: String,
57}
58
59/// Types of mismatches between actual and expected TOC
60#[derive(Debug)]
61enum TocMismatch {
62    /// Entry exists in TOC but heading doesn't exist
63    StaleEntry { entry: TocEntry },
64    /// Heading exists but no TOC entry for it
65    MissingEntry { expected: ExpectedTocEntry },
66    /// TOC entry text doesn't match heading text
67    TextMismatch {
68        entry: TocEntry,
69        expected: ExpectedTocEntry,
70    },
71    /// TOC entries are in wrong order
72    OrderMismatch { entry: TocEntry, expected_position: usize },
73}
74
75/// Regex patterns for stripping markdown formatting from heading text
76static MARKDOWN_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\([^)]+\)").unwrap());
77static MARKDOWN_REF_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[[^\]]*\]").unwrap());
78static MARKDOWN_IMAGE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").unwrap());
79static MARKDOWN_CODE_SPAN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`([^`]+)`").unwrap());
80static MARKDOWN_BOLD_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*([^*]+)\*\*").unwrap());
81static MARKDOWN_BOLD_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__([^_]+)__").unwrap());
82static MARKDOWN_ITALIC_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+)\*").unwrap());
83// Match underscore italic at word boundaries (space or start/end)
84// Handles: "_text_", " _text_ ", "start _text_", "_text_ end"
85static MARKDOWN_ITALIC_UNDERSCORE: LazyLock<Regex> =
86    LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])_([^_]+)_([^a-zA-Z0-9]|$)").unwrap());
87
88/// Strip markdown formatting from text, preserving plain text content.
89/// Used for TOC entry display text.
90///
91/// Examples:
92/// - `[terminal](url)` → `terminal`
93/// - `**bold**` → `bold`
94/// - `` `code` `` → `code`
95/// - `Tool: [terminal](url)` → `Tool: terminal`
96fn strip_markdown_formatting(text: &str) -> String {
97    let mut result = text.to_string();
98
99    // Strip images first (before links, since images use similar syntax)
100    result = MARKDOWN_IMAGE.replace_all(&result, "$1").to_string();
101
102    // Strip links: [text](url) → text
103    result = MARKDOWN_LINK.replace_all(&result, "$1").to_string();
104
105    // Strip reference links: [text][ref] → text
106    result = MARKDOWN_REF_LINK.replace_all(&result, "$1").to_string();
107
108    // Strip code spans: `code` → code
109    result = MARKDOWN_CODE_SPAN.replace_all(&result, "$1").to_string();
110
111    // Strip bold (do double before single to handle nested)
112    result = MARKDOWN_BOLD_ASTERISK.replace_all(&result, "$1").to_string();
113    result = MARKDOWN_BOLD_UNDERSCORE.replace_all(&result, "$1").to_string();
114
115    // Strip italic
116    result = MARKDOWN_ITALIC_ASTERISK.replace_all(&result, "$1").to_string();
117    // Underscore italic: preserve boundary chars, extract content
118    result = MARKDOWN_ITALIC_UNDERSCORE.replace_all(&result, "$1$2$3").to_string();
119
120    result
121}
122
123/// MD073: Table of Contents Validation
124///
125/// This rule validates that TOC sections match the actual document headings.
126/// It detects TOC regions via markers (`<!-- toc -->...<!-- tocstop -->`).
127///
128/// To opt into TOC validation, add markers to your document:
129/// ```markdown
130/// <!-- toc -->
131/// - [Section](#section)
132/// <!-- tocstop -->
133/// ```
134///
135/// ## Configuration
136///
137/// ```toml
138/// [MD073]
139/// # Enable the rule (opt-in, disabled by default)
140/// enabled = true
141/// # Minimum heading level to include (default: 2)
142/// min-level = 2
143/// # Maximum heading level to include (default: 4)
144/// max-level = 4
145/// # Whether TOC order must match document order (default: true)
146/// enforce-order = true
147/// ```
148#[derive(Clone)]
149pub struct MD073TocValidation {
150    /// Whether this rule is enabled (default: false - opt-in rule)
151    enabled: bool,
152    /// Minimum heading level to include
153    min_level: u8,
154    /// Maximum heading level to include
155    max_level: u8,
156    /// Whether to enforce order matching
157    enforce_order: bool,
158}
159
160impl Default for MD073TocValidation {
161    fn default() -> Self {
162        Self {
163            enabled: false, // Disabled by default - opt-in rule
164            min_level: 2,
165            max_level: 4,
166            enforce_order: true,
167        }
168    }
169}
170
171impl std::fmt::Debug for MD073TocValidation {
172    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
173        f.debug_struct("MD073TocValidation")
174            .field("enabled", &self.enabled)
175            .field("min_level", &self.min_level)
176            .field("max_level", &self.max_level)
177            .field("enforce_order", &self.enforce_order)
178            .finish()
179    }
180}
181
182impl MD073TocValidation {
183    /// Create a new rule with default settings
184    pub fn new() -> Self {
185        Self::default()
186    }
187
188    /// Detect TOC region using markers
189    fn detect_by_markers(&self, ctx: &LintContext) -> Option<TocRegion> {
190        let mut start_line = None;
191        let mut start_byte = None;
192
193        for (idx, line_info) in ctx.lines.iter().enumerate() {
194            let line_num = idx + 1;
195            let content = line_info.content(ctx.content);
196
197            // Skip if in code block or front matter
198            if line_info.in_code_block || line_info.in_front_matter {
199                continue;
200            }
201
202            // Look for start marker or stop marker
203            if let (Some(s_line), Some(s_byte)) = (start_line, start_byte) {
204                // We have a start, now look for stop marker
205                if TOC_STOP_MARKER.is_match(content) {
206                    let end_line = line_num - 1;
207                    let content_end = line_info.byte_offset;
208
209                    // Handle case where there's no content between markers
210                    if end_line < s_line {
211                        return Some(TocRegion {
212                            start_line: s_line,
213                            end_line: s_line,
214                            content_start: s_byte,
215                            content_end: s_byte,
216                        });
217                    }
218
219                    return Some(TocRegion {
220                        start_line: s_line,
221                        end_line,
222                        content_start: s_byte,
223                        content_end,
224                    });
225                }
226            } else if TOC_START_MARKER.is_match(content) {
227                // TOC content starts on the next line
228                if idx + 1 < ctx.lines.len() {
229                    start_line = Some(line_num + 1);
230                    start_byte = Some(ctx.lines[idx + 1].byte_offset);
231                }
232            }
233        }
234
235        None
236    }
237
238    /// Detect TOC region using markers
239    fn detect_toc_region(&self, ctx: &LintContext) -> Option<TocRegion> {
240        self.detect_by_markers(ctx)
241    }
242
243    /// Extract TOC entries from the detected region
244    fn extract_toc_entries(&self, ctx: &LintContext, region: &TocRegion) -> Vec<TocEntry> {
245        let mut entries = Vec::new();
246
247        for idx in (region.start_line - 1)..region.end_line.min(ctx.lines.len()) {
248            let line_info = &ctx.lines[idx];
249            let content = line_info.content(ctx.content);
250
251            if let Some(caps) = TOC_ENTRY_PATTERN.captures(content) {
252                let text = caps.get(2).map_or("", |m| m.as_str()).to_string();
253                let anchor = caps.get(3).map_or("", |m| m.as_str()).to_string();
254
255                entries.push(TocEntry { text, anchor });
256            }
257        }
258
259        entries
260    }
261
262    /// Build expected TOC entries from document headings
263    fn build_expected_toc(&self, ctx: &LintContext, toc_region: &TocRegion) -> Vec<ExpectedTocEntry> {
264        let mut entries = Vec::new();
265        let mut fragment_counts: HashMap<String, usize> = HashMap::new();
266
267        for (idx, line_info) in ctx.lines.iter().enumerate() {
268            let line_num = idx + 1;
269
270            // Skip headings before/within the TOC region
271            if line_num <= toc_region.end_line {
272                // Also skip the TOC heading itself for heading-based detection
273                continue;
274            }
275
276            // Skip code blocks, front matter, HTML blocks
277            if line_info.in_code_block || line_info.in_front_matter || line_info.in_html_block {
278                continue;
279            }
280
281            if let Some(heading) = &line_info.heading {
282                // Filter by min/max level
283                if heading.level < self.min_level || heading.level > self.max_level {
284                    continue;
285                }
286
287                // Use custom ID if available, otherwise generate GitHub-style anchor
288                let base_anchor = if let Some(custom_id) = &heading.custom_id {
289                    custom_id.clone()
290                } else {
291                    AnchorStyle::GitHub.generate_fragment(&heading.text)
292                };
293
294                // Handle duplicate anchors
295                let anchor = if let Some(count) = fragment_counts.get_mut(&base_anchor) {
296                    let suffix = *count;
297                    *count += 1;
298                    format!("{base_anchor}-{suffix}")
299                } else {
300                    fragment_counts.insert(base_anchor.clone(), 1);
301                    base_anchor
302                };
303
304                entries.push(ExpectedTocEntry {
305                    heading_line: line_num,
306                    level: heading.level,
307                    text: heading.text.clone(),
308                    anchor,
309                });
310            }
311        }
312
313        entries
314    }
315
316    /// Compare actual TOC entries against expected and find mismatches
317    fn validate_toc(&self, actual: &[TocEntry], expected: &[ExpectedTocEntry]) -> Vec<TocMismatch> {
318        let mut mismatches = Vec::new();
319
320        // Build a map of expected anchors
321        let expected_anchors: HashMap<&str, &ExpectedTocEntry> =
322            expected.iter().map(|e| (e.anchor.as_str(), e)).collect();
323
324        // Build a map of actual anchors
325        let actual_anchors: HashMap<&str, &TocEntry> = actual.iter().map(|e| (e.anchor.as_str(), e)).collect();
326
327        // Check for stale entries (in TOC but not in expected)
328        for entry in actual {
329            if !expected_anchors.contains_key(entry.anchor.as_str()) {
330                mismatches.push(TocMismatch::StaleEntry { entry: entry.clone() });
331            }
332        }
333
334        // Check for missing entries (in expected but not in TOC)
335        for exp in expected {
336            if !actual_anchors.contains_key(exp.anchor.as_str()) {
337                mismatches.push(TocMismatch::MissingEntry { expected: exp.clone() });
338            }
339        }
340
341        // Check for text mismatches (compare stripped versions)
342        for entry in actual {
343            if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
344                // Compare stripped text (removes markdown formatting like links, emphasis)
345                let actual_stripped = strip_markdown_formatting(entry.text.trim());
346                let expected_stripped = strip_markdown_formatting(exp.text.trim());
347                if actual_stripped != expected_stripped {
348                    mismatches.push(TocMismatch::TextMismatch {
349                        entry: entry.clone(),
350                        expected: (*exp).clone(),
351                    });
352                }
353            }
354        }
355
356        // Check order if enforce_order is enabled
357        if self.enforce_order && !actual.is_empty() && !expected.is_empty() {
358            let expected_order: Vec<&str> = expected.iter().map(|e| e.anchor.as_str()).collect();
359
360            // Find entries that exist in both but are out of order
361            let mut expected_idx = 0;
362            for entry in actual {
363                // Skip entries that don't exist in expected
364                if !expected_anchors.contains_key(entry.anchor.as_str()) {
365                    continue;
366                }
367
368                // Find where this anchor should be
369                while expected_idx < expected_order.len() && expected_order[expected_idx] != entry.anchor {
370                    expected_idx += 1;
371                }
372
373                if expected_idx >= expected_order.len() {
374                    // This entry is after where it should be
375                    let correct_pos = expected_order.iter().position(|a| *a == entry.anchor).unwrap_or(0);
376                    // Only add order mismatch if not already reported as stale/text mismatch
377                    let already_reported = mismatches.iter().any(|m| match m {
378                        TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
379                        TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
380                        _ => false,
381                    });
382                    if !already_reported {
383                        mismatches.push(TocMismatch::OrderMismatch {
384                            entry: entry.clone(),
385                            expected_position: correct_pos + 1,
386                        });
387                    }
388                } else {
389                    expected_idx += 1;
390                }
391            }
392        }
393
394        mismatches
395    }
396
397    /// Generate a new TOC from expected entries (always uses nested indentation)
398    fn generate_toc(&self, expected: &[ExpectedTocEntry]) -> String {
399        if expected.is_empty() {
400            return String::new();
401        }
402
403        let mut result = String::new();
404        let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
405
406        for entry in expected {
407            let level_diff = entry.level.saturating_sub(base_level) as usize;
408            let indent = "  ".repeat(level_diff);
409
410            // Strip markdown formatting from heading text for clean TOC entries
411            let display_text = strip_markdown_formatting(&entry.text);
412            result.push_str(&format!("{indent}- [{display_text}](#{})\n", entry.anchor));
413        }
414
415        result
416    }
417}
418
419impl Rule for MD073TocValidation {
420    fn name(&self) -> &'static str {
421        "MD073"
422    }
423
424    fn description(&self) -> &'static str {
425        "Table of Contents should match document headings"
426    }
427
428    fn should_skip(&self, ctx: &LintContext) -> bool {
429        // Skip if rule is disabled (opt-in rule)
430        if !self.enabled {
431            return true;
432        }
433
434        // Quick check: skip if no TOC markers
435        let has_toc_marker = ctx.content.contains("<!-- toc") || ctx.content.contains("<!--toc");
436        !has_toc_marker
437    }
438
439    fn check(&self, ctx: &LintContext) -> LintResult {
440        let mut warnings = Vec::new();
441
442        // Detect TOC region
443        let Some(region) = self.detect_toc_region(ctx) else {
444            // No TOC found - nothing to validate
445            return Ok(warnings);
446        };
447
448        // Extract actual TOC entries
449        let actual_entries = self.extract_toc_entries(ctx, &region);
450
451        // Build expected TOC from headings
452        let expected_entries = self.build_expected_toc(ctx, &region);
453
454        // If no expected entries and no actual entries, nothing to validate
455        if expected_entries.is_empty() && actual_entries.is_empty() {
456            return Ok(warnings);
457        }
458
459        // Validate
460        let mismatches = self.validate_toc(&actual_entries, &expected_entries);
461
462        if !mismatches.is_empty() {
463            // Generate a single warning at the TOC region with details
464            let mut details = Vec::new();
465
466            for mismatch in &mismatches {
467                match mismatch {
468                    TocMismatch::StaleEntry { entry } => {
469                        details.push(format!("Stale entry: '{}' (heading no longer exists)", entry.text));
470                    }
471                    TocMismatch::MissingEntry { expected } => {
472                        details.push(format!(
473                            "Missing entry: '{}' (line {})",
474                            expected.text, expected.heading_line
475                        ));
476                    }
477                    TocMismatch::TextMismatch { entry, expected } => {
478                        details.push(format!(
479                            "Text mismatch: TOC has '{}', heading is '{}'",
480                            entry.text, expected.text
481                        ));
482                    }
483                    TocMismatch::OrderMismatch {
484                        entry,
485                        expected_position,
486                    } => {
487                        details.push(format!(
488                            "Order mismatch: '{}' should be at position {}",
489                            entry.text, expected_position
490                        ));
491                    }
492                }
493            }
494
495            let message = format!(
496                "Table of Contents does not match document headings: {}",
497                details.join("; ")
498            );
499
500            // Generate fix: replace entire TOC content
501            let new_toc = self.generate_toc(&expected_entries);
502            let fix_range = region.content_start..region.content_end;
503
504            warnings.push(LintWarning {
505                rule_name: Some(self.name().to_string()),
506                message,
507                line: region.start_line,
508                column: 1,
509                end_line: region.end_line,
510                end_column: 1,
511                severity: Severity::Warning,
512                fix: Some(Fix {
513                    range: fix_range,
514                    replacement: new_toc,
515                }),
516            });
517        }
518
519        Ok(warnings)
520    }
521
522    fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
523        // Detect TOC region
524        let Some(region) = self.detect_toc_region(ctx) else {
525            // No TOC found - return unchanged
526            return Ok(ctx.content.to_string());
527        };
528
529        // Build expected TOC from headings
530        let expected_entries = self.build_expected_toc(ctx, &region);
531
532        // Generate new TOC
533        let new_toc = self.generate_toc(&expected_entries);
534
535        // Replace the TOC content
536        let mut result = String::with_capacity(ctx.content.len());
537        result.push_str(&ctx.content[..region.content_start]);
538        result.push_str(&new_toc);
539        result.push_str(&ctx.content[region.content_end..]);
540
541        Ok(result)
542    }
543
544    fn category(&self) -> RuleCategory {
545        RuleCategory::Other
546    }
547
548    fn as_any(&self) -> &dyn std::any::Any {
549        self
550    }
551
552    fn default_config_section(&self) -> Option<(String, toml::Value)> {
553        let value: toml::Value = toml::from_str(
554            r#"
555# Whether this rule is enabled (opt-in, disabled by default)
556enabled = false
557# Minimum heading level to include
558min-level = 2
559# Maximum heading level to include
560max-level = 4
561# Whether TOC order must match document order
562enforce-order = true
563"#,
564        )
565        .ok()?;
566        Some(("MD073".to_string(), value))
567    }
568
569    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
570    where
571        Self: Sized,
572    {
573        let mut rule = MD073TocValidation::default();
574
575        if let Some(rule_config) = config.rules.get("MD073") {
576            // Parse enabled (opt-in rule, defaults to false)
577            if let Some(enabled) = rule_config.values.get("enabled").and_then(|v| v.as_bool()) {
578                rule.enabled = enabled;
579            }
580
581            // Parse min-level
582            if let Some(min_level) = rule_config.values.get("min-level").and_then(|v| v.as_integer()) {
583                rule.min_level = (min_level.clamp(1, 6)) as u8;
584            }
585
586            // Parse max-level
587            if let Some(max_level) = rule_config.values.get("max-level").and_then(|v| v.as_integer()) {
588                rule.max_level = (max_level.clamp(1, 6)) as u8;
589            }
590
591            // Parse enforce-order
592            if let Some(enforce_order) = rule_config.values.get("enforce-order").and_then(|v| v.as_bool()) {
593                rule.enforce_order = enforce_order;
594            }
595        }
596
597        Box::new(rule)
598    }
599}
600
601#[cfg(test)]
602mod tests {
603    use super::*;
604    use crate::config::MarkdownFlavor;
605
606    fn create_ctx(content: &str) -> LintContext<'_> {
607        LintContext::new(content, MarkdownFlavor::Standard, None)
608    }
609
610    /// Create rule with enabled=true for tests that call check() directly
611    fn create_enabled_rule() -> MD073TocValidation {
612        MD073TocValidation {
613            enabled: true,
614            ..MD073TocValidation::default()
615        }
616    }
617
618    // ========== Detection Tests ==========
619
620    #[test]
621    fn test_detect_markers_basic() {
622        let rule = MD073TocValidation::new();
623        let content = r#"# Title
624
625<!-- toc -->
626
627- [Heading 1](#heading-1)
628
629<!-- tocstop -->
630
631## Heading 1
632
633Content here.
634"#;
635        let ctx = create_ctx(content);
636        let region = rule.detect_by_markers(&ctx);
637        assert!(region.is_some());
638        let region = region.unwrap();
639        // Verify region boundaries are detected correctly
640        assert_eq!(region.start_line, 4);
641        assert_eq!(region.end_line, 6);
642    }
643
644    #[test]
645    fn test_detect_markers_variations() {
646        let rule = MD073TocValidation::new();
647
648        // Test <!--toc--> (no spaces)
649        let content1 = "<!--toc-->\n- [A](#a)\n<!--tocstop-->\n";
650        let ctx1 = create_ctx(content1);
651        assert!(rule.detect_by_markers(&ctx1).is_some());
652
653        // Test <!-- TOC --> (uppercase)
654        let content2 = "<!-- TOC -->\n- [A](#a)\n<!-- TOCSTOP -->\n";
655        let ctx2 = create_ctx(content2);
656        assert!(rule.detect_by_markers(&ctx2).is_some());
657
658        // Test <!-- /toc --> (alternative stop marker)
659        let content3 = "<!-- toc -->\n- [A](#a)\n<!-- /toc -->\n";
660        let ctx3 = create_ctx(content3);
661        assert!(rule.detect_by_markers(&ctx3).is_some());
662    }
663
664    #[test]
665    fn test_no_toc_region() {
666        let rule = MD073TocValidation::new();
667        let content = r#"# Title
668
669## Heading 1
670
671Content here.
672
673## Heading 2
674
675More content.
676"#;
677        let ctx = create_ctx(content);
678        let region = rule.detect_toc_region(&ctx);
679        assert!(region.is_none());
680    }
681
682    // ========== Validation Tests ==========
683
684    #[test]
685    fn test_toc_matches_headings() {
686        let rule = create_enabled_rule();
687        let content = r#"# Title
688
689<!-- toc -->
690
691- [Heading 1](#heading-1)
692- [Heading 2](#heading-2)
693
694<!-- tocstop -->
695
696## Heading 1
697
698Content.
699
700## Heading 2
701
702More content.
703"#;
704        let ctx = create_ctx(content);
705        let result = rule.check(&ctx).unwrap();
706        assert!(result.is_empty(), "Expected no warnings for matching TOC");
707    }
708
709    #[test]
710    fn test_missing_entry() {
711        let rule = create_enabled_rule();
712        let content = r#"# Title
713
714<!-- toc -->
715
716- [Heading 1](#heading-1)
717
718<!-- tocstop -->
719
720## Heading 1
721
722Content.
723
724## Heading 2
725
726New heading not in TOC.
727"#;
728        let ctx = create_ctx(content);
729        let result = rule.check(&ctx).unwrap();
730        assert_eq!(result.len(), 1);
731        assert!(result[0].message.contains("Missing entry"));
732        assert!(result[0].message.contains("Heading 2"));
733    }
734
735    #[test]
736    fn test_stale_entry() {
737        let rule = create_enabled_rule();
738        let content = r#"# Title
739
740<!-- toc -->
741
742- [Heading 1](#heading-1)
743- [Deleted Heading](#deleted-heading)
744
745<!-- tocstop -->
746
747## Heading 1
748
749Content.
750"#;
751        let ctx = create_ctx(content);
752        let result = rule.check(&ctx).unwrap();
753        assert_eq!(result.len(), 1);
754        assert!(result[0].message.contains("Stale entry"));
755        assert!(result[0].message.contains("Deleted Heading"));
756    }
757
758    #[test]
759    fn test_text_mismatch() {
760        let rule = create_enabled_rule();
761        let content = r#"# Title
762
763<!-- toc -->
764
765- [Old Name](#heading-1)
766
767<!-- tocstop -->
768
769## Heading 1
770
771Content.
772"#;
773        let ctx = create_ctx(content);
774        let result = rule.check(&ctx).unwrap();
775        assert_eq!(result.len(), 1);
776        assert!(result[0].message.contains("Text mismatch"));
777    }
778
779    // ========== Level Filtering Tests ==========
780
781    #[test]
782    fn test_min_level_excludes_h1() {
783        let mut rule = MD073TocValidation::new();
784        rule.min_level = 2;
785
786        let content = r#"<!-- toc -->
787
788<!-- tocstop -->
789
790# Should Be Excluded
791
792## Should Be Included
793
794Content.
795"#;
796        let ctx = create_ctx(content);
797        let region = rule.detect_toc_region(&ctx).unwrap();
798        let expected = rule.build_expected_toc(&ctx, &region);
799
800        assert_eq!(expected.len(), 1);
801        assert_eq!(expected[0].text, "Should Be Included");
802    }
803
804    #[test]
805    fn test_max_level_excludes_h5_h6() {
806        let mut rule = MD073TocValidation::new();
807        rule.max_level = 4;
808
809        let content = r#"<!-- toc -->
810
811<!-- tocstop -->
812
813## Level 2
814
815### Level 3
816
817#### Level 4
818
819##### Level 5 Should Be Excluded
820
821###### Level 6 Should Be Excluded
822"#;
823        let ctx = create_ctx(content);
824        let region = rule.detect_toc_region(&ctx).unwrap();
825        let expected = rule.build_expected_toc(&ctx, &region);
826
827        assert_eq!(expected.len(), 3);
828        assert!(expected.iter().all(|e| e.level <= 4));
829    }
830
831    // ========== Fix Tests ==========
832
833    #[test]
834    fn test_fix_adds_missing_entry() {
835        let rule = MD073TocValidation::new();
836        let content = r#"# Title
837
838<!-- toc -->
839
840- [Heading 1](#heading-1)
841
842<!-- tocstop -->
843
844## Heading 1
845
846Content.
847
848## Heading 2
849
850New heading.
851"#;
852        let ctx = create_ctx(content);
853        let fixed = rule.fix(&ctx).unwrap();
854        assert!(fixed.contains("- [Heading 2](#heading-2)"));
855    }
856
857    #[test]
858    fn test_fix_removes_stale_entry() {
859        let rule = MD073TocValidation::new();
860        let content = r#"# Title
861
862<!-- toc -->
863
864- [Heading 1](#heading-1)
865- [Deleted](#deleted)
866
867<!-- tocstop -->
868
869## Heading 1
870
871Content.
872"#;
873        let ctx = create_ctx(content);
874        let fixed = rule.fix(&ctx).unwrap();
875        assert!(fixed.contains("- [Heading 1](#heading-1)"));
876        assert!(!fixed.contains("Deleted"));
877    }
878
879    #[test]
880    fn test_fix_idempotent() {
881        let rule = MD073TocValidation::new();
882        let content = r#"# Title
883
884<!-- toc -->
885
886- [Heading 1](#heading-1)
887- [Heading 2](#heading-2)
888
889<!-- tocstop -->
890
891## Heading 1
892
893Content.
894
895## Heading 2
896
897More.
898"#;
899        let ctx = create_ctx(content);
900        let fixed1 = rule.fix(&ctx).unwrap();
901        let ctx2 = create_ctx(&fixed1);
902        let fixed2 = rule.fix(&ctx2).unwrap();
903
904        // Second fix should produce same output
905        assert_eq!(fixed1, fixed2);
906    }
907
908    #[test]
909    fn test_fix_preserves_markers() {
910        let rule = MD073TocValidation::new();
911        let content = r#"# Title
912
913<!-- toc -->
914
915Old TOC content.
916
917<!-- tocstop -->
918
919## New Heading
920
921Content.
922"#;
923        let ctx = create_ctx(content);
924        let fixed = rule.fix(&ctx).unwrap();
925
926        // Markers should still be present
927        assert!(fixed.contains("<!-- toc -->"));
928        assert!(fixed.contains("<!-- tocstop -->"));
929        // New content should be generated
930        assert!(fixed.contains("- [New Heading](#new-heading)"));
931    }
932
933    #[test]
934    fn test_fix_requires_markers() {
935        let rule = create_enabled_rule();
936
937        // Document without markers - no TOC detected, no changes
938        let content_no_markers = r#"# Title
939
940## Heading 1
941
942Content.
943"#;
944        let ctx = create_ctx(content_no_markers);
945        let fixed = rule.fix(&ctx).unwrap();
946        assert_eq!(fixed, content_no_markers);
947
948        // Document with markers - TOC detected and fixed
949        let content_markers = r#"# Title
950
951<!-- toc -->
952
953- [Old Entry](#old-entry)
954
955<!-- tocstop -->
956
957## Heading 1
958
959Content.
960"#;
961        let ctx = create_ctx(content_markers);
962        let fixed = rule.fix(&ctx).unwrap();
963        assert!(fixed.contains("- [Heading 1](#heading-1)"));
964        assert!(!fixed.contains("Old Entry"));
965    }
966
967    // ========== Anchor Tests ==========
968
969    #[test]
970    fn test_duplicate_heading_anchors() {
971        let rule = MD073TocValidation::new();
972        let content = r#"# Title
973
974<!-- toc -->
975
976<!-- tocstop -->
977
978## Duplicate
979
980Content.
981
982## Duplicate
983
984More content.
985
986## Duplicate
987
988Even more.
989"#;
990        let ctx = create_ctx(content);
991        let region = rule.detect_toc_region(&ctx).unwrap();
992        let expected = rule.build_expected_toc(&ctx, &region);
993
994        assert_eq!(expected.len(), 3);
995        assert_eq!(expected[0].anchor, "duplicate");
996        assert_eq!(expected[1].anchor, "duplicate-1");
997        assert_eq!(expected[2].anchor, "duplicate-2");
998    }
999
1000    // ========== Edge Cases ==========
1001
1002    #[test]
1003    fn test_headings_in_code_blocks_ignored() {
1004        let rule = create_enabled_rule();
1005        let content = r#"# Title
1006
1007<!-- toc -->
1008
1009- [Real Heading](#real-heading)
1010
1011<!-- tocstop -->
1012
1013## Real Heading
1014
1015```markdown
1016## Fake Heading In Code
1017```
1018
1019Content.
1020"#;
1021        let ctx = create_ctx(content);
1022        let result = rule.check(&ctx).unwrap();
1023        assert!(result.is_empty(), "Should not report fake heading in code block");
1024    }
1025
1026    #[test]
1027    fn test_empty_toc_region() {
1028        let rule = create_enabled_rule();
1029        let content = r#"# Title
1030
1031<!-- toc -->
1032<!-- tocstop -->
1033
1034## Heading 1
1035
1036Content.
1037"#;
1038        let ctx = create_ctx(content);
1039        let result = rule.check(&ctx).unwrap();
1040        assert_eq!(result.len(), 1);
1041        assert!(result[0].message.contains("Missing entry"));
1042    }
1043
1044    #[test]
1045    fn test_nested_indentation() {
1046        let rule = create_enabled_rule();
1047
1048        let content = r#"<!-- toc -->
1049
1050<!-- tocstop -->
1051
1052## Level 2
1053
1054### Level 3
1055
1056#### Level 4
1057
1058## Another Level 2
1059"#;
1060        let ctx = create_ctx(content);
1061        let region = rule.detect_toc_region(&ctx).unwrap();
1062        let expected = rule.build_expected_toc(&ctx, &region);
1063        let toc = rule.generate_toc(&expected);
1064
1065        // Check indentation (always nested)
1066        assert!(toc.contains("- [Level 2](#level-2)"));
1067        assert!(toc.contains("  - [Level 3](#level-3)"));
1068        assert!(toc.contains("    - [Level 4](#level-4)"));
1069        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1070    }
1071
1072    // ========== Order Mismatch Tests ==========
1073
1074    #[test]
1075    fn test_order_mismatch_detected() {
1076        let rule = create_enabled_rule();
1077        let content = r#"# Title
1078
1079<!-- toc -->
1080
1081- [Section B](#section-b)
1082- [Section A](#section-a)
1083
1084<!-- tocstop -->
1085
1086## Section A
1087
1088Content A.
1089
1090## Section B
1091
1092Content B.
1093"#;
1094        let ctx = create_ctx(content);
1095        let result = rule.check(&ctx).unwrap();
1096        // Should detect order mismatch - Section B appears before Section A in TOC
1097        // but Section A comes first in document
1098        assert!(!result.is_empty(), "Should detect order mismatch");
1099    }
1100
1101    #[test]
1102    fn test_order_mismatch_ignored_when_disabled() {
1103        let mut rule = create_enabled_rule();
1104        rule.enforce_order = false;
1105        let content = r#"# Title
1106
1107<!-- toc -->
1108
1109- [Section B](#section-b)
1110- [Section A](#section-a)
1111
1112<!-- tocstop -->
1113
1114## Section A
1115
1116Content A.
1117
1118## Section B
1119
1120Content B.
1121"#;
1122        let ctx = create_ctx(content);
1123        let result = rule.check(&ctx).unwrap();
1124        // With enforce_order=false, order mismatches should be ignored
1125        assert!(result.is_empty(), "Should not report order mismatch when disabled");
1126    }
1127
1128    // ========== Unicode and Special Characters Tests ==========
1129
1130    #[test]
1131    fn test_unicode_headings() {
1132        let rule = create_enabled_rule();
1133        let content = r#"# Title
1134
1135<!-- toc -->
1136
1137- [日本語の見出し](#日本語の見出し)
1138- [Émojis 🎉](#émojis-)
1139
1140<!-- tocstop -->
1141
1142## 日本語の見出し
1143
1144Japanese content.
1145
1146## Émojis 🎉
1147
1148Content with emojis.
1149"#;
1150        let ctx = create_ctx(content);
1151        let result = rule.check(&ctx).unwrap();
1152        // Should handle unicode correctly
1153        assert!(result.is_empty(), "Should handle unicode headings");
1154    }
1155
1156    #[test]
1157    fn test_special_characters_in_headings() {
1158        let rule = create_enabled_rule();
1159        let content = r#"# Title
1160
1161<!-- toc -->
1162
1163- [What's New?](#whats-new)
1164- [C++ Guide](#c-guide)
1165
1166<!-- tocstop -->
1167
1168## What's New?
1169
1170News content.
1171
1172## C++ Guide
1173
1174C++ content.
1175"#;
1176        let ctx = create_ctx(content);
1177        let result = rule.check(&ctx).unwrap();
1178        assert!(result.is_empty(), "Should handle special characters");
1179    }
1180
1181    #[test]
1182    fn test_code_spans_in_headings() {
1183        let rule = create_enabled_rule();
1184        let content = r#"# Title
1185
1186<!-- toc -->
1187
1188- [`check [PATHS...]`](#check-paths)
1189
1190<!-- tocstop -->
1191
1192## `check [PATHS...]`
1193
1194Command documentation.
1195"#;
1196        let ctx = create_ctx(content);
1197        let result = rule.check(&ctx).unwrap();
1198        assert!(result.is_empty(), "Should handle code spans in headings with brackets");
1199    }
1200
1201    // ========== Config Tests ==========
1202
1203    #[test]
1204    fn test_from_config_defaults() {
1205        let config = crate::config::Config::default();
1206        let rule = MD073TocValidation::from_config(&config);
1207        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1208
1209        assert_eq!(rule.min_level, 2);
1210        assert_eq!(rule.max_level, 4);
1211        assert!(rule.enforce_order);
1212    }
1213
1214    // ========== Markdown Stripping Tests ==========
1215
1216    #[test]
1217    fn test_strip_markdown_formatting_link() {
1218        let result = strip_markdown_formatting("Tool: [terminal](https://example.com)");
1219        assert_eq!(result, "Tool: terminal");
1220    }
1221
1222    #[test]
1223    fn test_strip_markdown_formatting_bold() {
1224        let result = strip_markdown_formatting("This is **bold** text");
1225        assert_eq!(result, "This is bold text");
1226
1227        let result = strip_markdown_formatting("This is __bold__ text");
1228        assert_eq!(result, "This is bold text");
1229    }
1230
1231    #[test]
1232    fn test_strip_markdown_formatting_italic() {
1233        let result = strip_markdown_formatting("This is *italic* text");
1234        assert_eq!(result, "This is italic text");
1235
1236        let result = strip_markdown_formatting("This is _italic_ text");
1237        assert_eq!(result, "This is italic text");
1238    }
1239
1240    #[test]
1241    fn test_strip_markdown_formatting_code_span() {
1242        let result = strip_markdown_formatting("Use the `format` function");
1243        assert_eq!(result, "Use the format function");
1244    }
1245
1246    #[test]
1247    fn test_strip_markdown_formatting_image() {
1248        let result = strip_markdown_formatting("See ![logo](image.png) for details");
1249        assert_eq!(result, "See logo for details");
1250    }
1251
1252    #[test]
1253    fn test_strip_markdown_formatting_reference_link() {
1254        let result = strip_markdown_formatting("See [documentation][docs] for details");
1255        assert_eq!(result, "See documentation for details");
1256    }
1257
1258    #[test]
1259    fn test_strip_markdown_formatting_combined() {
1260        // Link is stripped first, leaving bold, then bold is stripped
1261        let result = strip_markdown_formatting("Tool: [**terminal**](https://example.com)");
1262        assert_eq!(result, "Tool: terminal");
1263    }
1264
1265    #[test]
1266    fn test_toc_with_link_in_heading_matches_stripped_text() {
1267        let rule = create_enabled_rule();
1268
1269        // TOC entry text matches the stripped heading text
1270        let content = r#"# Title
1271
1272<!-- toc -->
1273
1274- [Tool: terminal](#tool-terminal)
1275
1276<!-- tocstop -->
1277
1278## Tool: [terminal](https://example.com)
1279
1280Content here.
1281"#;
1282        let ctx = create_ctx(content);
1283        let result = rule.check(&ctx).unwrap();
1284        assert!(
1285            result.is_empty(),
1286            "Stripped heading text should match TOC entry: {result:?}"
1287        );
1288    }
1289
1290    #[test]
1291    fn test_toc_with_simplified_text_still_mismatches() {
1292        let rule = create_enabled_rule();
1293
1294        // TOC entry "terminal" does NOT match stripped heading "Tool: terminal"
1295        let content = r#"# Title
1296
1297<!-- toc -->
1298
1299- [terminal](#tool-terminal)
1300
1301<!-- tocstop -->
1302
1303## Tool: [terminal](https://example.com)
1304
1305Content here.
1306"#;
1307        let ctx = create_ctx(content);
1308        let result = rule.check(&ctx).unwrap();
1309        assert_eq!(result.len(), 1, "Should report text mismatch");
1310        assert!(result[0].message.contains("Text mismatch"));
1311    }
1312
1313    #[test]
1314    fn test_fix_generates_stripped_toc_entries() {
1315        let rule = MD073TocValidation::new();
1316        let content = r#"# Title
1317
1318<!-- toc -->
1319
1320<!-- tocstop -->
1321
1322## Tool: [busybox](https://www.busybox.net/)
1323
1324Content.
1325
1326## Tool: [mount](https://en.wikipedia.org/wiki/Mount)
1327
1328More content.
1329"#;
1330        let ctx = create_ctx(content);
1331        let fixed = rule.fix(&ctx).unwrap();
1332
1333        // Generated TOC should have stripped text (links removed)
1334        assert!(
1335            fixed.contains("- [Tool: busybox](#tool-busybox)"),
1336            "TOC entry should have stripped link text"
1337        );
1338        assert!(
1339            fixed.contains("- [Tool: mount](#tool-mount)"),
1340            "TOC entry should have stripped link text"
1341        );
1342        // TOC entries should NOT contain the URL (the actual headings in the document still will)
1343        // Check only within the TOC region (between toc markers)
1344        let toc_start = fixed.find("<!-- toc -->").unwrap();
1345        let toc_end = fixed.find("<!-- tocstop -->").unwrap();
1346        let toc_content = &fixed[toc_start..toc_end];
1347        assert!(
1348            !toc_content.contains("busybox.net"),
1349            "TOC should not contain URLs: {toc_content}"
1350        );
1351        assert!(
1352            !toc_content.contains("wikipedia.org"),
1353            "TOC should not contain URLs: {toc_content}"
1354        );
1355    }
1356
1357    #[test]
1358    fn test_fix_with_bold_in_heading() {
1359        let rule = MD073TocValidation::new();
1360        let content = r#"# Title
1361
1362<!-- toc -->
1363
1364<!-- tocstop -->
1365
1366## **Important** Section
1367
1368Content.
1369"#;
1370        let ctx = create_ctx(content);
1371        let fixed = rule.fix(&ctx).unwrap();
1372
1373        // Generated TOC should have stripped text (bold markers removed)
1374        assert!(fixed.contains("- [Important Section](#important-section)"));
1375    }
1376
1377    #[test]
1378    fn test_fix_with_code_in_heading() {
1379        let rule = MD073TocValidation::new();
1380        let content = r#"# Title
1381
1382<!-- toc -->
1383
1384<!-- tocstop -->
1385
1386## Using `async` Functions
1387
1388Content.
1389"#;
1390        let ctx = create_ctx(content);
1391        let fixed = rule.fix(&ctx).unwrap();
1392
1393        // Generated TOC should have stripped text (backticks removed)
1394        assert!(fixed.contains("- [Using async Functions](#using-async-functions)"));
1395    }
1396
1397    // ========== Custom Anchor Tests ==========
1398
1399    #[test]
1400    fn test_custom_anchor_id_respected() {
1401        let rule = create_enabled_rule();
1402        let content = r#"# Title
1403
1404<!-- toc -->
1405
1406- [My Section](#my-custom-anchor)
1407
1408<!-- tocstop -->
1409
1410## My Section {#my-custom-anchor}
1411
1412Content here.
1413"#;
1414        let ctx = create_ctx(content);
1415        let result = rule.check(&ctx).unwrap();
1416        assert!(result.is_empty(), "Should respect custom anchor IDs: {result:?}");
1417    }
1418
1419    #[test]
1420    fn test_custom_anchor_id_in_generated_toc() {
1421        let rule = create_enabled_rule();
1422        let content = r#"# Title
1423
1424<!-- toc -->
1425
1426<!-- tocstop -->
1427
1428## First Section {#custom-first}
1429
1430Content.
1431
1432## Second Section {#another-custom}
1433
1434More content.
1435"#;
1436        let ctx = create_ctx(content);
1437        let fixed = rule.fix(&ctx).unwrap();
1438        assert!(fixed.contains("- [First Section](#custom-first)"));
1439        assert!(fixed.contains("- [Second Section](#another-custom)"));
1440    }
1441
1442    #[test]
1443    fn test_mixed_custom_and_generated_anchors() {
1444        let rule = create_enabled_rule();
1445        let content = r#"# Title
1446
1447<!-- toc -->
1448
1449- [Custom Section](#my-id)
1450- [Normal Section](#normal-section)
1451
1452<!-- tocstop -->
1453
1454## Custom Section {#my-id}
1455
1456Content.
1457
1458## Normal Section
1459
1460More content.
1461"#;
1462        let ctx = create_ctx(content);
1463        let result = rule.check(&ctx).unwrap();
1464        assert!(result.is_empty(), "Should handle mixed custom and generated anchors");
1465    }
1466
1467    // ========== Anchor Generation Tests ==========
1468
1469    #[test]
1470    fn test_github_anchor_style() {
1471        let rule = create_enabled_rule();
1472
1473        let content = r#"<!-- toc -->
1474
1475<!-- tocstop -->
1476
1477## Test_With_Underscores
1478
1479Content.
1480"#;
1481        let ctx = create_ctx(content);
1482        let region = rule.detect_toc_region(&ctx).unwrap();
1483        let expected = rule.build_expected_toc(&ctx, &region);
1484
1485        // GitHub-style anchors preserve underscores
1486        assert_eq!(expected[0].anchor, "test_with_underscores");
1487    }
1488
1489    // ========== Stress Tests ==========
1490
1491    #[test]
1492    fn test_stress_many_headings() {
1493        let rule = create_enabled_rule();
1494
1495        // Generate a document with 150 headings
1496        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1497
1498        for i in 1..=150 {
1499            content.push_str(&format!("## Heading Number {i}\n\nContent for section {i}.\n\n"));
1500        }
1501
1502        let ctx = create_ctx(&content);
1503
1504        // Should not panic or timeout
1505        let result = rule.check(&ctx).unwrap();
1506
1507        // Should report missing entries for all 150 headings
1508        assert_eq!(result.len(), 1, "Should report single warning for TOC");
1509        assert!(result[0].message.contains("Missing entry"));
1510
1511        // Fix should generate TOC with 150 entries
1512        let fixed = rule.fix(&ctx).unwrap();
1513        assert!(fixed.contains("- [Heading Number 1](#heading-number-1)"));
1514        assert!(fixed.contains("- [Heading Number 100](#heading-number-100)"));
1515        assert!(fixed.contains("- [Heading Number 150](#heading-number-150)"));
1516    }
1517
1518    #[test]
1519    fn test_stress_deeply_nested() {
1520        let rule = create_enabled_rule();
1521        let content = r#"# Title
1522
1523<!-- toc -->
1524
1525<!-- tocstop -->
1526
1527## Level 2 A
1528
1529### Level 3 A
1530
1531#### Level 4 A
1532
1533## Level 2 B
1534
1535### Level 3 B
1536
1537#### Level 4 B
1538
1539## Level 2 C
1540
1541### Level 3 C
1542
1543#### Level 4 C
1544
1545## Level 2 D
1546
1547### Level 3 D
1548
1549#### Level 4 D
1550"#;
1551        let ctx = create_ctx(content);
1552        let fixed = rule.fix(&ctx).unwrap();
1553
1554        // Check nested indentation is correct
1555        assert!(fixed.contains("- [Level 2 A](#level-2-a)"));
1556        assert!(fixed.contains("  - [Level 3 A](#level-3-a)"));
1557        assert!(fixed.contains("    - [Level 4 A](#level-4-a)"));
1558        assert!(fixed.contains("- [Level 2 D](#level-2-d)"));
1559        assert!(fixed.contains("  - [Level 3 D](#level-3-d)"));
1560        assert!(fixed.contains("    - [Level 4 D](#level-4-d)"));
1561    }
1562
1563    #[test]
1564    fn test_stress_many_duplicates() {
1565        let rule = create_enabled_rule();
1566
1567        // Generate 50 headings with the same text
1568        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1569        for _ in 0..50 {
1570            content.push_str("## FAQ\n\nContent.\n\n");
1571        }
1572
1573        let ctx = create_ctx(&content);
1574        let region = rule.detect_toc_region(&ctx).unwrap();
1575        let expected = rule.build_expected_toc(&ctx, &region);
1576
1577        // Should generate unique anchors for all 50
1578        assert_eq!(expected.len(), 50);
1579        assert_eq!(expected[0].anchor, "faq");
1580        assert_eq!(expected[1].anchor, "faq-1");
1581        assert_eq!(expected[49].anchor, "faq-49");
1582    }
1583}