Skip to main content

rumdl_lib/rules/
md073_toc_validation.rs

1//! MD073: Table of Contents validation rule
2//!
3//! Validates that TOC sections match the actual document headings.
4
5use crate::lint_context::LintContext;
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::anchor_styles::AnchorStyle;
8use regex::Regex;
9use std::collections::HashMap;
10use std::sync::LazyLock;
11
12/// Regex for TOC start marker: `<!-- toc -->` with optional whitespace variations
13static TOC_START_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*toc\s*-->").unwrap());
14
15/// Regex for TOC stop marker: `<!-- tocstop -->` or `<!-- /toc -->`
16static TOC_STOP_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*(?:tocstop|/toc)\s*-->").unwrap());
17
18/// Regex for extracting TOC entries: `- [text](#anchor)` or `* [text](#anchor)`
19/// with optional leading whitespace for nested items
20/// Handles nested brackets like `[`check [PATHS...]`](#check-paths)`
21static TOC_ENTRY_PATTERN: LazyLock<Regex> =
22    LazyLock::new(|| Regex::new(r"^(\s*)[-*]\s+\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]\(#([^)]+)\)").unwrap());
23
24/// Represents a detected TOC region in the document
25#[derive(Debug, Clone)]
26struct TocRegion {
27    /// 1-indexed start line of the TOC content (after the marker)
28    start_line: usize,
29    /// 1-indexed end line of the TOC content (before the stop marker)
30    end_line: usize,
31    /// Byte offset where TOC content starts
32    content_start: usize,
33    /// Byte offset where TOC content ends
34    content_end: usize,
35}
36
37/// A parsed TOC entry from the existing TOC
38#[derive(Debug, Clone)]
39struct TocEntry {
40    /// Display text of the link
41    text: String,
42    /// Anchor/fragment (without #)
43    anchor: String,
44    /// Number of leading whitespace characters (for indentation checking)
45    indent_spaces: usize,
46}
47
48/// An expected TOC entry generated from document headings
49#[derive(Debug, Clone)]
50struct ExpectedTocEntry {
51    /// 1-indexed line number of the heading
52    heading_line: usize,
53    /// Heading level (1-6)
54    level: u8,
55    /// Heading text (for display)
56    text: String,
57    /// Generated anchor
58    anchor: String,
59}
60
61/// Types of mismatches between actual and expected TOC
62#[derive(Debug)]
63enum TocMismatch {
64    /// Entry exists in TOC but heading doesn't exist
65    StaleEntry { entry: TocEntry },
66    /// Heading exists but no TOC entry for it
67    MissingEntry { expected: ExpectedTocEntry },
68    /// TOC entry text doesn't match heading text
69    TextMismatch {
70        entry: TocEntry,
71        expected: ExpectedTocEntry,
72    },
73    /// TOC entries are in wrong order
74    OrderMismatch { entry: TocEntry, expected_position: usize },
75    /// TOC entry has wrong indentation level
76    IndentationMismatch {
77        entry: TocEntry,
78        actual_indent: usize,
79        expected_indent: usize,
80    },
81}
82
83/// Regex patterns used by `strip_links_and_images`.
84static MARKDOWN_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\([^)]+\)").unwrap());
85static MARKDOWN_REF_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[[^\]]*\]").unwrap());
86static MARKDOWN_IMAGE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").unwrap());
87
88/// Extract code-span byte ranges from `text` using the CommonMark rule:
89/// a run of N backticks opens a span closed by exactly N backticks.
90/// Returns a sorted list of `(start, end)` byte offsets that are inside code spans
91/// (including the backtick delimiters themselves).
92fn code_span_ranges(text: &str) -> Vec<(usize, usize)> {
93    let chars: Vec<char> = text.chars().collect();
94    let len = chars.len();
95    let mut ranges = Vec::new();
96    let mut i = 0;
97
98    while i < len {
99        if chars[i] == '`' {
100            let span_start = i;
101            while i < len && chars[i] == '`' {
102                i += 1;
103            }
104            let n = i - span_start;
105
106            // Search for the matching closing sequence of exactly n backticks
107            let mut j = i;
108            let mut found = false;
109            while j < len {
110                if chars[j] == '`' {
111                    let close_start = j;
112                    while j < len && chars[j] == '`' {
113                        j += 1;
114                    }
115                    if j - close_start == n {
116                        // Convert char indices to byte offsets
117                        let byte_start: usize = text.char_indices().nth(span_start).map_or(0, |(b, _)| b);
118                        let byte_end: usize = text.char_indices().nth(j).map_or(text.len(), |(b, _)| b);
119                        ranges.push((byte_start, byte_end));
120                        i = j;
121                        found = true;
122                        break;
123                    }
124                } else {
125                    j += 1;
126                }
127            }
128            if !found {
129                // No matching close; skip past the opening backticks
130                i = span_start + n;
131            }
132        } else {
133            i += 1;
134        }
135    }
136
137    ranges
138}
139
140/// Strip only links and images from `text`, preserving all other inline
141/// formatting (code spans, bold, italic, etc.).
142///
143/// Links and images cannot appear inside a Markdown link label `[...]`, so they
144/// must be removed when building TOC display text. Code spans and emphasis are
145/// valid inside link labels and should be kept so the TOC entry faithfully
146/// reflects the heading's visual appearance.
147///
148/// Code-span contents are protected: link-like syntax such as `[foo](bar)` that
149/// appears inside backticks is left untouched.
150///
151/// Examples:
152/// - `` `my header` `` → `` `my header` `` (code ticks preserved)
153/// - `[terminal](url)` → `terminal` (link stripped)
154/// - `![alt](img.png)` → `alt` (image stripped)
155/// - `**bold**` → `**bold**` (emphasis preserved)
156/// - `` `[foo](bar)` `` → `` `[foo](bar)` `` (link inside code span preserved)
157/// - `Tool: [terminal](url)` → `Tool: terminal`
158fn strip_links_and_images(text: &str) -> String {
159    // Collect code-span byte ranges so we can protect their contents from
160    // the link/image regex substitutions.
161    let protected = code_span_ranges(text);
162
163    // If there are no code spans the fast path avoids all the extra work.
164    if protected.is_empty() {
165        let mut result = text.to_string();
166        result = MARKDOWN_IMAGE.replace_all(&result, "$1").to_string();
167        result = MARKDOWN_LINK.replace_all(&result, "$1").to_string();
168        result = MARKDOWN_REF_LINK.replace_all(&result, "$1").to_string();
169        return result;
170    }
171
172    // Replace each code span with a unique placeholder that cannot be matched
173    // by the link/image regexes, apply the regexes, then restore the originals.
174    let mut placeholders: Vec<(&str, String)> = Vec::with_capacity(protected.len());
175    let mut masked = text.to_string();
176    // Process spans in reverse order so byte offsets remain valid after each replacement.
177    for (i, &(start, end)) in protected.iter().enumerate().rev() {
178        // Placeholder: a string containing no `[`, `]`, `(`, `)`, `!` characters.
179        let placeholder = format!("\x00CODESPAN{i}\x00");
180        let original = &text[start..end];
181        placeholders.push((original, placeholder.clone()));
182        masked.replace_range(start..end, &placeholder);
183    }
184
185    // Apply link/image stripping to the masked string
186    masked = MARKDOWN_IMAGE.replace_all(&masked, "$1").to_string();
187    masked = MARKDOWN_LINK.replace_all(&masked, "$1").to_string();
188    masked = MARKDOWN_REF_LINK.replace_all(&masked, "$1").to_string();
189
190    // Restore the original code-span text
191    for (original, placeholder) in &placeholders {
192        masked = masked.replace(placeholder.as_str(), original);
193    }
194
195    masked
196}
197
198/// MD073: Table of Contents Validation
199///
200/// This rule validates that TOC sections match the actual document headings.
201/// It detects TOC regions via markers (`<!-- toc -->...<!-- tocstop -->`).
202///
203/// To opt into TOC validation, add markers to your document:
204/// ```markdown
205/// <!-- toc -->
206/// - [Section](#section)
207/// <!-- tocstop -->
208/// ```
209///
210/// ## Configuration
211///
212/// ```toml
213/// [MD073]
214/// # Enable the rule (opt-in, disabled by default)
215/// enabled = true
216/// # Minimum heading level to include (default: 2)
217/// min-level = 2
218/// # Maximum heading level to include (default: 4)
219/// max-level = 4
220/// # Whether TOC order must match document order (default: true)
221/// enforce-order = true
222/// # Indent size per nesting level (default: from MD007 config, or 2)
223/// indent = 2
224/// ```
225#[derive(Clone)]
226pub struct MD073TocValidation {
227    /// Whether this rule is enabled (default: false - opt-in rule)
228    enabled: bool,
229    /// Minimum heading level to include
230    min_level: u8,
231    /// Maximum heading level to include
232    max_level: u8,
233    /// Whether to enforce order matching
234    enforce_order: bool,
235    /// Indent size per nesting level (reads from MD007 config by default)
236    pub indent: usize,
237}
238
239impl Default for MD073TocValidation {
240    fn default() -> Self {
241        Self {
242            enabled: false, // Disabled by default - opt-in rule
243            min_level: 2,
244            max_level: 4,
245            enforce_order: true,
246            indent: 2, // Default indent, can be overridden by MD007 config
247        }
248    }
249}
250
251impl std::fmt::Debug for MD073TocValidation {
252    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
253        f.debug_struct("MD073TocValidation")
254            .field("enabled", &self.enabled)
255            .field("min_level", &self.min_level)
256            .field("max_level", &self.max_level)
257            .field("enforce_order", &self.enforce_order)
258            .field("indent", &self.indent)
259            .finish()
260    }
261}
262
263impl MD073TocValidation {
264    /// Create a new rule with default settings
265    pub fn new() -> Self {
266        Self::default()
267    }
268
269    /// Detect TOC region using markers
270    fn detect_by_markers(&self, ctx: &LintContext) -> Option<TocRegion> {
271        let mut start_line = None;
272        let mut start_byte = None;
273
274        for (idx, line_info) in ctx.lines.iter().enumerate() {
275            let line_num = idx + 1;
276            let content = line_info.content(ctx.content);
277
278            // Skip if in code block or front matter
279            if line_info.in_code_block || line_info.in_front_matter {
280                continue;
281            }
282
283            // Look for start marker or stop marker
284            if let (Some(s_line), Some(s_byte)) = (start_line, start_byte) {
285                // We have a start, now look for stop marker
286                if TOC_STOP_MARKER.is_match(content) {
287                    let end_line = line_num - 1;
288                    let content_end = line_info.byte_offset;
289
290                    // Handle case where there's no content between markers
291                    if end_line < s_line {
292                        return Some(TocRegion {
293                            start_line: s_line,
294                            end_line: s_line,
295                            content_start: s_byte,
296                            content_end: s_byte,
297                        });
298                    }
299
300                    return Some(TocRegion {
301                        start_line: s_line,
302                        end_line,
303                        content_start: s_byte,
304                        content_end,
305                    });
306                }
307            } else if TOC_START_MARKER.is_match(content) {
308                // TOC content starts on the next line
309                if idx + 1 < ctx.lines.len() {
310                    start_line = Some(line_num + 1);
311                    start_byte = Some(ctx.lines[idx + 1].byte_offset);
312                }
313            }
314        }
315
316        None
317    }
318
319    /// Detect TOC region using markers
320    fn detect_toc_region(&self, ctx: &LintContext) -> Option<TocRegion> {
321        self.detect_by_markers(ctx)
322    }
323
324    /// Extract TOC entries from the detected region
325    fn extract_toc_entries(&self, ctx: &LintContext, region: &TocRegion) -> Vec<TocEntry> {
326        let mut entries = Vec::new();
327
328        for idx in (region.start_line - 1)..region.end_line.min(ctx.lines.len()) {
329            let line_info = &ctx.lines[idx];
330            let content = line_info.content(ctx.content);
331
332            if let Some(caps) = TOC_ENTRY_PATTERN.captures(content) {
333                let indent_spaces = caps.get(1).map_or(0, |m| m.as_str().len());
334                let text = caps.get(2).map_or("", |m| m.as_str()).to_string();
335                let anchor = caps.get(3).map_or("", |m| m.as_str()).to_string();
336
337                entries.push(TocEntry {
338                    text,
339                    anchor,
340                    indent_spaces,
341                });
342            }
343        }
344
345        entries
346    }
347
348    /// Build expected TOC entries from document headings
349    fn build_expected_toc(&self, ctx: &LintContext, toc_region: &TocRegion) -> Vec<ExpectedTocEntry> {
350        let mut entries = Vec::new();
351        let mut fragment_counts: HashMap<String, usize> = HashMap::new();
352
353        for (idx, line_info) in ctx.lines.iter().enumerate() {
354            let line_num = idx + 1;
355
356            // Skip headings before/within the TOC region
357            if line_num <= toc_region.end_line {
358                // Also skip the TOC heading itself for heading-based detection
359                continue;
360            }
361
362            // Skip code blocks, front matter, HTML blocks
363            if line_info.in_code_block || line_info.in_front_matter || line_info.in_html_block {
364                continue;
365            }
366
367            if let Some(heading) = &line_info.heading {
368                // Filter by min/max level
369                if heading.level < self.min_level || heading.level > self.max_level {
370                    continue;
371                }
372
373                // Use custom ID if available, otherwise generate GitHub-style anchor
374                let base_anchor = if let Some(custom_id) = &heading.custom_id {
375                    custom_id.clone()
376                } else {
377                    AnchorStyle::GitHub.generate_fragment(&heading.text)
378                };
379
380                // Handle duplicate anchors
381                let anchor = if let Some(count) = fragment_counts.get_mut(&base_anchor) {
382                    let suffix = *count;
383                    *count += 1;
384                    format!("{base_anchor}-{suffix}")
385                } else {
386                    fragment_counts.insert(base_anchor.clone(), 1);
387                    base_anchor
388                };
389
390                entries.push(ExpectedTocEntry {
391                    heading_line: line_num,
392                    level: heading.level,
393                    text: heading.text.clone(),
394                    anchor,
395                });
396            }
397        }
398
399        entries
400    }
401
402    /// Compare actual TOC entries against expected and find mismatches
403    fn validate_toc(&self, actual: &[TocEntry], expected: &[ExpectedTocEntry]) -> Vec<TocMismatch> {
404        let mut mismatches = Vec::new();
405
406        // Build a map of expected anchors
407        let expected_anchors: HashMap<&str, &ExpectedTocEntry> =
408            expected.iter().map(|e| (e.anchor.as_str(), e)).collect();
409
410        // Count actual anchors (handles duplicate anchors in TOC)
411        let mut actual_anchor_counts: HashMap<&str, usize> = HashMap::new();
412        for entry in actual {
413            *actual_anchor_counts.entry(entry.anchor.as_str()).or_insert(0) += 1;
414        }
415
416        // Count expected anchors
417        let mut expected_anchor_counts: HashMap<&str, usize> = HashMap::new();
418        for exp in expected {
419            *expected_anchor_counts.entry(exp.anchor.as_str()).or_insert(0) += 1;
420        }
421
422        // Check for stale entries (in TOC but not in expected, accounting for counts)
423        let mut stale_anchor_counts: HashMap<&str, usize> = HashMap::new();
424        for entry in actual {
425            let actual_count = actual_anchor_counts.get(entry.anchor.as_str()).copied().unwrap_or(0);
426            let expected_count = expected_anchor_counts.get(entry.anchor.as_str()).copied().unwrap_or(0);
427            if actual_count > expected_count {
428                let reported = stale_anchor_counts.entry(entry.anchor.as_str()).or_insert(0);
429                if *reported < actual_count - expected_count {
430                    *reported += 1;
431                    mismatches.push(TocMismatch::StaleEntry { entry: entry.clone() });
432                }
433            }
434        }
435
436        // Check for missing entries (in expected but not in TOC, accounting for counts)
437        let mut missing_anchor_counts: HashMap<&str, usize> = HashMap::new();
438        for exp in expected {
439            let actual_count = actual_anchor_counts.get(exp.anchor.as_str()).copied().unwrap_or(0);
440            let expected_count = expected_anchor_counts.get(exp.anchor.as_str()).copied().unwrap_or(0);
441            if expected_count > actual_count {
442                let reported = missing_anchor_counts.entry(exp.anchor.as_str()).or_insert(0);
443                if *reported < expected_count - actual_count {
444                    *reported += 1;
445                    mismatches.push(TocMismatch::MissingEntry { expected: exp.clone() });
446                }
447            }
448        }
449
450        // Check for text mismatches. Compare with the same normalization used in
451        // generate_toc: strip only links and images, preserve code spans and emphasis.
452        // This ensures a correct user-written TOC entry like `` [`my header`](#anchor) ``
453        // is not flagged against a heading `` `my header` ``.
454        for entry in actual {
455            if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
456                let actual_normalized = strip_links_and_images(entry.text.trim());
457                let expected_normalized = strip_links_and_images(exp.text.trim());
458                if actual_normalized != expected_normalized {
459                    mismatches.push(TocMismatch::TextMismatch {
460                        entry: entry.clone(),
461                        expected: (*exp).clone(),
462                    });
463                }
464            }
465        }
466
467        // Check for indentation mismatches
468        // Expected indentation is indent spaces per level difference from base level
469        if !expected.is_empty() {
470            let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
471
472            for entry in actual {
473                if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
474                    let level_diff = exp.level.saturating_sub(base_level) as usize;
475                    let expected_indent = level_diff * self.indent;
476
477                    if entry.indent_spaces != expected_indent {
478                        // Don't report indentation mismatch if already reported as text mismatch
479                        let already_reported = mismatches.iter().any(|m| match m {
480                            TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
481                            TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
482                            _ => false,
483                        });
484                        if !already_reported {
485                            mismatches.push(TocMismatch::IndentationMismatch {
486                                entry: entry.clone(),
487                                actual_indent: entry.indent_spaces,
488                                expected_indent,
489                            });
490                        }
491                    }
492                }
493            }
494        }
495
496        // Check order if enforce_order is enabled
497        if self.enforce_order && !actual.is_empty() && !expected.is_empty() {
498            let expected_order: Vec<&str> = expected.iter().map(|e| e.anchor.as_str()).collect();
499
500            // Find entries that exist in both but are out of order
501            let mut expected_idx = 0;
502            for entry in actual {
503                // Skip entries that don't exist in expected
504                if !expected_anchors.contains_key(entry.anchor.as_str()) {
505                    continue;
506                }
507
508                // Find where this anchor should be
509                while expected_idx < expected_order.len() && expected_order[expected_idx] != entry.anchor {
510                    expected_idx += 1;
511                }
512
513                if expected_idx >= expected_order.len() {
514                    // This entry is after where it should be
515                    let correct_pos = expected_order.iter().position(|a| *a == entry.anchor).unwrap_or(0);
516                    // Only add order mismatch if not already reported as stale/text mismatch
517                    let already_reported = mismatches.iter().any(|m| match m {
518                        TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
519                        TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
520                        _ => false,
521                    });
522                    if !already_reported {
523                        mismatches.push(TocMismatch::OrderMismatch {
524                            entry: entry.clone(),
525                            expected_position: correct_pos + 1,
526                        });
527                    }
528                } else {
529                    expected_idx += 1;
530                }
531            }
532        }
533
534        mismatches
535    }
536
537    /// Generate a new TOC from expected entries (always uses nested indentation)
538    fn generate_toc(&self, expected: &[ExpectedTocEntry]) -> String {
539        if expected.is_empty() {
540            return String::new();
541        }
542
543        let mut result = String::new();
544        let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
545        let indent_str = " ".repeat(self.indent);
546
547        for entry in expected {
548            let level_diff = entry.level.saturating_sub(base_level) as usize;
549            let indent = indent_str.repeat(level_diff);
550
551            // Build display text: strip only links and images (which would create invalid
552            // nested-link syntax inside `[...]`), but preserve code spans and emphasis so
553            // the TOC entry reflects the heading's visual appearance.
554            let display_text = strip_links_and_images(&entry.text);
555            result.push_str(&format!("{indent}- [{display_text}](#{})\n", entry.anchor));
556        }
557
558        result
559    }
560}
561
562impl Rule for MD073TocValidation {
563    fn name(&self) -> &'static str {
564        "MD073"
565    }
566
567    fn description(&self) -> &'static str {
568        "Table of Contents should match document headings"
569    }
570
571    fn should_skip(&self, ctx: &LintContext) -> bool {
572        // Quick check: skip if no TOC markers. detect_toc_region() is
573        // case-insensitive, so use a case-insensitive containment check here
574        // to avoid skipping fix() on documents with uppercase markers like
575        // `<!-- TOC -->`.
576        let lower = ctx.content.to_ascii_lowercase();
577        !(lower.contains("<!-- toc") || lower.contains("<!--toc"))
578    }
579
580    fn check(&self, ctx: &LintContext) -> LintResult {
581        let mut warnings = Vec::new();
582
583        // Detect TOC region
584        let Some(region) = self.detect_toc_region(ctx) else {
585            // No TOC found - nothing to validate
586            return Ok(warnings);
587        };
588
589        // Extract actual TOC entries
590        let actual_entries = self.extract_toc_entries(ctx, &region);
591
592        // Build expected TOC from headings
593        let expected_entries = self.build_expected_toc(ctx, &region);
594
595        // If no expected entries and no actual entries, nothing to validate
596        if expected_entries.is_empty() && actual_entries.is_empty() {
597            return Ok(warnings);
598        }
599
600        // Validate
601        let mismatches = self.validate_toc(&actual_entries, &expected_entries);
602
603        if !mismatches.is_empty() {
604            // Generate a single warning at the TOC region with details
605            let mut details = Vec::new();
606
607            for mismatch in &mismatches {
608                match mismatch {
609                    TocMismatch::StaleEntry { entry } => {
610                        details.push(format!("Stale entry: '{}' (heading no longer exists)", entry.text));
611                    }
612                    TocMismatch::MissingEntry { expected } => {
613                        details.push(format!(
614                            "Missing entry: '{}' (line {})",
615                            expected.text, expected.heading_line
616                        ));
617                    }
618                    TocMismatch::TextMismatch { entry, expected } => {
619                        details.push(format!(
620                            "Text mismatch: TOC has '{}', heading is '{}'",
621                            entry.text, expected.text
622                        ));
623                    }
624                    TocMismatch::OrderMismatch {
625                        entry,
626                        expected_position,
627                    } => {
628                        details.push(format!(
629                            "Order mismatch: '{}' should be at position {}",
630                            entry.text, expected_position
631                        ));
632                    }
633                    TocMismatch::IndentationMismatch {
634                        entry,
635                        actual_indent,
636                        expected_indent,
637                        ..
638                    } => {
639                        details.push(format!(
640                            "Indentation mismatch: '{}' has {} spaces, expected {} spaces",
641                            entry.text, actual_indent, expected_indent
642                        ));
643                    }
644                }
645            }
646
647            let message = format!(
648                "Table of Contents does not match document headings: {}",
649                details.join("; ")
650            );
651
652            // Generate fix: replace entire TOC content
653            let new_toc = self.generate_toc(&expected_entries);
654            let fix_range = region.content_start..region.content_end;
655
656            warnings.push(LintWarning {
657                rule_name: Some(self.name().to_string()),
658                message,
659                line: region.start_line,
660                column: 1,
661                end_line: region.end_line,
662                end_column: 1,
663                severity: Severity::Warning,
664                fix: Some(Fix::new(fix_range, new_toc)),
665            });
666        }
667
668        Ok(warnings)
669    }
670
671    fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
672        if self.should_skip(ctx) {
673            return Ok(ctx.content.to_string());
674        }
675        let warnings = self.check(ctx)?;
676        if warnings.is_empty() {
677            return Ok(ctx.content.to_string());
678        }
679        let warnings =
680            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
681        crate::utils::fix_utils::apply_warning_fixes(ctx.content, &warnings).map_err(LintError::InvalidInput)
682    }
683
684    fn category(&self) -> RuleCategory {
685        RuleCategory::Other
686    }
687
688    fn as_any(&self) -> &dyn std::any::Any {
689        self
690    }
691
692    fn default_config_section(&self) -> Option<(String, toml::Value)> {
693        let value: toml::Value = toml::from_str(
694            r#"
695# Whether this rule is enabled (opt-in, disabled by default)
696enabled = false
697# Minimum heading level to include
698min-level = 2
699# Maximum heading level to include
700max-level = 4
701# Whether TOC order must match document order
702enforce-order = true
703# Indentation per nesting level (defaults to MD007's indent value)
704indent = 2
705"#,
706        )
707        .ok()?;
708        Some(("MD073".to_string(), value))
709    }
710
711    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
712    where
713        Self: Sized,
714    {
715        let mut rule = MD073TocValidation::default();
716        let mut indent_from_md073 = false;
717
718        if let Some(rule_config) = config.rules.get("MD073") {
719            // Parse enabled (opt-in rule, defaults to false)
720            if let Some(enabled) = rule_config.values.get("enabled").and_then(toml::Value::as_bool) {
721                rule.enabled = enabled;
722            }
723
724            // Parse min-level
725            if let Some(min_level) = rule_config.values.get("min-level").and_then(toml::Value::as_integer) {
726                rule.min_level = (min_level.clamp(1, 6)) as u8;
727            }
728
729            // Parse max-level
730            if let Some(max_level) = rule_config.values.get("max-level").and_then(toml::Value::as_integer) {
731                rule.max_level = (max_level.clamp(1, 6)) as u8;
732            }
733
734            // Parse enforce-order
735            if let Some(enforce_order) = rule_config.values.get("enforce-order").and_then(toml::Value::as_bool) {
736                rule.enforce_order = enforce_order;
737            }
738
739            // Parse indent (MD073-specific override)
740            if let Some(indent) = rule_config.values.get("indent").and_then(toml::Value::as_integer) {
741                rule.indent = (indent.clamp(1, 8)) as usize;
742                indent_from_md073 = true;
743            }
744        }
745
746        // If indent not explicitly set in MD073, read from MD007 config
747        if !indent_from_md073
748            && let Some(md007_config) = config.rules.get("MD007")
749            && let Some(indent) = md007_config.values.get("indent").and_then(toml::Value::as_integer)
750        {
751            rule.indent = (indent.clamp(1, 8)) as usize;
752        }
753
754        Box::new(rule)
755    }
756}
757
758#[cfg(test)]
759mod tests {
760    use super::*;
761    use crate::config::MarkdownFlavor;
762    use regex::Regex;
763    use std::sync::LazyLock;
764
765    // ---- Test-only helpers for stripping all inline formatting ----
766    // These are not used in production code; they exist only to test
767    // the individual stripping primitives in isolation.
768
769    /// Strip code spans from text, handling multi-backtick spans per CommonMark spec.
770    fn strip_code_spans(text: &str) -> String {
771        let chars: Vec<char> = text.chars().collect();
772        let len = chars.len();
773        let mut result = String::with_capacity(text.len());
774        let mut i = 0;
775
776        while i < len {
777            if chars[i] == '`' {
778                let open_start = i;
779                while i < len && chars[i] == '`' {
780                    i += 1;
781                }
782                let backtick_count = i - open_start;
783
784                let content_start = i;
785                let mut found_close = false;
786                while i < len {
787                    if chars[i] == '`' {
788                        let close_start = i;
789                        while i < len && chars[i] == '`' {
790                            i += 1;
791                        }
792                        if i - close_start == backtick_count {
793                            let content: String = chars[content_start..close_start].iter().collect();
794                            let stripped = if content.starts_with(' ') && content.ends_with(' ') && content.len() > 1 {
795                                content[1..content.len() - 1].to_string()
796                            } else {
797                                content
798                            };
799                            result.push_str(&stripped);
800                            found_close = true;
801                            break;
802                        }
803                    } else {
804                        i += 1;
805                    }
806                }
807                if !found_close {
808                    for _ in 0..backtick_count {
809                        result.push('`');
810                    }
811                    let remaining: String = chars[content_start..].iter().collect();
812                    result.push_str(&remaining);
813                    break;
814                }
815            } else {
816                result.push(chars[i]);
817                i += 1;
818            }
819        }
820
821        result
822    }
823
824    static TEST_BOLD_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*([^*]+)\*\*").unwrap());
825    static TEST_BOLD_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__([^_]+)__").unwrap());
826    static TEST_ITALIC_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+)\*").unwrap());
827    static TEST_ITALIC_UNDERSCORE: LazyLock<Regex> =
828        LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])_([^_]+)_([^a-zA-Z0-9]|$)").unwrap());
829
830    /// Strip all inline markdown formatting from text, reducing it to plain text.
831    /// Builds on `strip_links_and_images` and additionally removes code spans,
832    /// bold, and italic markers. Used in tests only.
833    fn strip_markdown_formatting(text: &str) -> String {
834        let mut result = strip_links_and_images(text);
835        result = strip_code_spans(&result);
836        result = TEST_BOLD_ASTERISK.replace_all(&result, "$1").to_string();
837        result = TEST_BOLD_UNDERSCORE.replace_all(&result, "$1").to_string();
838        result = TEST_ITALIC_ASTERISK.replace_all(&result, "$1").to_string();
839        result = TEST_ITALIC_UNDERSCORE.replace_all(&result, "$1$2$3").to_string();
840        result
841    }
842
843    fn create_ctx(content: &str) -> LintContext<'_> {
844        LintContext::new(content, MarkdownFlavor::Standard, None)
845    }
846
847    /// Create rule with enabled=true for tests that call check() directly
848    fn create_enabled_rule() -> MD073TocValidation {
849        MD073TocValidation {
850            enabled: true,
851            ..MD073TocValidation::default()
852        }
853    }
854
855    // ========== Detection Tests ==========
856
857    #[test]
858    fn test_detect_markers_basic() {
859        let rule = MD073TocValidation::new();
860        let content = r#"# Title
861
862<!-- toc -->
863
864- [Heading 1](#heading-1)
865
866<!-- tocstop -->
867
868## Heading 1
869
870Content here.
871"#;
872        let ctx = create_ctx(content);
873        let region = rule.detect_by_markers(&ctx);
874        assert!(region.is_some());
875        let region = region.unwrap();
876        // Verify region boundaries are detected correctly
877        assert_eq!(region.start_line, 4);
878        assert_eq!(region.end_line, 6);
879    }
880
881    #[test]
882    fn test_detect_markers_variations() {
883        let rule = MD073TocValidation::new();
884
885        // Test <!--toc--> (no spaces)
886        let content1 = "<!--toc-->\n- [A](#a)\n<!--tocstop-->\n";
887        let ctx1 = create_ctx(content1);
888        assert!(rule.detect_by_markers(&ctx1).is_some());
889
890        // Test <!-- TOC --> (uppercase)
891        let content2 = "<!-- TOC -->\n- [A](#a)\n<!-- TOCSTOP -->\n";
892        let ctx2 = create_ctx(content2);
893        assert!(rule.detect_by_markers(&ctx2).is_some());
894
895        // Test <!-- /toc --> (alternative stop marker)
896        let content3 = "<!-- toc -->\n- [A](#a)\n<!-- /toc -->\n";
897        let ctx3 = create_ctx(content3);
898        assert!(rule.detect_by_markers(&ctx3).is_some());
899    }
900
901    #[test]
902    fn test_no_toc_region() {
903        let rule = MD073TocValidation::new();
904        let content = r#"# Title
905
906## Heading 1
907
908Content here.
909
910## Heading 2
911
912More content.
913"#;
914        let ctx = create_ctx(content);
915        let region = rule.detect_toc_region(&ctx);
916        assert!(region.is_none());
917    }
918
919    // ========== Validation Tests ==========
920
921    #[test]
922    fn test_toc_matches_headings() {
923        let rule = create_enabled_rule();
924        let content = r#"# Title
925
926<!-- toc -->
927
928- [Heading 1](#heading-1)
929- [Heading 2](#heading-2)
930
931<!-- tocstop -->
932
933## Heading 1
934
935Content.
936
937## Heading 2
938
939More content.
940"#;
941        let ctx = create_ctx(content);
942        let result = rule.check(&ctx).unwrap();
943        assert!(result.is_empty(), "Expected no warnings for matching TOC");
944    }
945
946    #[test]
947    fn test_missing_entry() {
948        let rule = create_enabled_rule();
949        let content = r#"# Title
950
951<!-- toc -->
952
953- [Heading 1](#heading-1)
954
955<!-- tocstop -->
956
957## Heading 1
958
959Content.
960
961## Heading 2
962
963New heading not in TOC.
964"#;
965        let ctx = create_ctx(content);
966        let result = rule.check(&ctx).unwrap();
967        assert_eq!(result.len(), 1);
968        assert!(result[0].message.contains("Missing entry"));
969        assert!(result[0].message.contains("Heading 2"));
970    }
971
972    #[test]
973    fn test_stale_entry() {
974        let rule = create_enabled_rule();
975        let content = r#"# Title
976
977<!-- toc -->
978
979- [Heading 1](#heading-1)
980- [Deleted Heading](#deleted-heading)
981
982<!-- tocstop -->
983
984## Heading 1
985
986Content.
987"#;
988        let ctx = create_ctx(content);
989        let result = rule.check(&ctx).unwrap();
990        assert_eq!(result.len(), 1);
991        assert!(result[0].message.contains("Stale entry"));
992        assert!(result[0].message.contains("Deleted Heading"));
993    }
994
995    #[test]
996    fn test_text_mismatch() {
997        let rule = create_enabled_rule();
998        let content = r#"# Title
999
1000<!-- toc -->
1001
1002- [Old Name](#heading-1)
1003
1004<!-- tocstop -->
1005
1006## Heading 1
1007
1008Content.
1009"#;
1010        let ctx = create_ctx(content);
1011        let result = rule.check(&ctx).unwrap();
1012        assert_eq!(result.len(), 1);
1013        assert!(result[0].message.contains("Text mismatch"));
1014    }
1015
1016    // ========== Level Filtering Tests ==========
1017
1018    #[test]
1019    fn test_min_level_excludes_h1() {
1020        let mut rule = MD073TocValidation::new();
1021        rule.min_level = 2;
1022
1023        let content = r#"<!-- toc -->
1024
1025<!-- tocstop -->
1026
1027# Should Be Excluded
1028
1029## Should Be Included
1030
1031Content.
1032"#;
1033        let ctx = create_ctx(content);
1034        let region = rule.detect_toc_region(&ctx).unwrap();
1035        let expected = rule.build_expected_toc(&ctx, &region);
1036
1037        assert_eq!(expected.len(), 1);
1038        assert_eq!(expected[0].text, "Should Be Included");
1039    }
1040
1041    #[test]
1042    fn test_max_level_excludes_h5_h6() {
1043        let mut rule = MD073TocValidation::new();
1044        rule.max_level = 4;
1045
1046        let content = r#"<!-- toc -->
1047
1048<!-- tocstop -->
1049
1050## Level 2
1051
1052### Level 3
1053
1054#### Level 4
1055
1056##### Level 5 Should Be Excluded
1057
1058###### Level 6 Should Be Excluded
1059"#;
1060        let ctx = create_ctx(content);
1061        let region = rule.detect_toc_region(&ctx).unwrap();
1062        let expected = rule.build_expected_toc(&ctx, &region);
1063
1064        assert_eq!(expected.len(), 3);
1065        assert!(expected.iter().all(|e| e.level <= 4));
1066    }
1067
1068    // ========== Fix Tests ==========
1069
1070    #[test]
1071    fn test_fix_adds_missing_entry() {
1072        let rule = MD073TocValidation::new();
1073        let content = r#"# Title
1074
1075<!-- toc -->
1076
1077- [Heading 1](#heading-1)
1078
1079<!-- tocstop -->
1080
1081## Heading 1
1082
1083Content.
1084
1085## Heading 2
1086
1087New heading.
1088"#;
1089        let ctx = create_ctx(content);
1090        let fixed = rule.fix(&ctx).unwrap();
1091        assert!(fixed.contains("- [Heading 2](#heading-2)"));
1092    }
1093
1094    #[test]
1095    fn test_fix_removes_stale_entry() {
1096        let rule = MD073TocValidation::new();
1097        let content = r#"# Title
1098
1099<!-- toc -->
1100
1101- [Heading 1](#heading-1)
1102- [Deleted](#deleted)
1103
1104<!-- tocstop -->
1105
1106## Heading 1
1107
1108Content.
1109"#;
1110        let ctx = create_ctx(content);
1111        let fixed = rule.fix(&ctx).unwrap();
1112        assert!(fixed.contains("- [Heading 1](#heading-1)"));
1113        assert!(!fixed.contains("Deleted"));
1114    }
1115
1116    #[test]
1117    fn test_fix_idempotent() {
1118        let rule = MD073TocValidation::new();
1119        let content = r#"# Title
1120
1121<!-- toc -->
1122
1123- [Heading 1](#heading-1)
1124- [Heading 2](#heading-2)
1125
1126<!-- tocstop -->
1127
1128## Heading 1
1129
1130Content.
1131
1132## Heading 2
1133
1134More.
1135"#;
1136        let ctx = create_ctx(content);
1137        let fixed1 = rule.fix(&ctx).unwrap();
1138        let ctx2 = create_ctx(&fixed1);
1139        let fixed2 = rule.fix(&ctx2).unwrap();
1140
1141        // Second fix should produce same output
1142        assert_eq!(fixed1, fixed2);
1143    }
1144
1145    #[test]
1146    fn test_fix_preserves_markers() {
1147        let rule = MD073TocValidation::new();
1148        let content = r#"# Title
1149
1150<!-- toc -->
1151
1152Old TOC content.
1153
1154<!-- tocstop -->
1155
1156## New Heading
1157
1158Content.
1159"#;
1160        let ctx = create_ctx(content);
1161        let fixed = rule.fix(&ctx).unwrap();
1162
1163        // Markers should still be present
1164        assert!(fixed.contains("<!-- toc -->"));
1165        assert!(fixed.contains("<!-- tocstop -->"));
1166        // New content should be generated
1167        assert!(fixed.contains("- [New Heading](#new-heading)"));
1168    }
1169
1170    #[test]
1171    fn test_fix_requires_markers() {
1172        let rule = create_enabled_rule();
1173
1174        // Document without markers - no TOC detected, no changes
1175        let content_no_markers = r#"# Title
1176
1177## Heading 1
1178
1179Content.
1180"#;
1181        let ctx = create_ctx(content_no_markers);
1182        let fixed = rule.fix(&ctx).unwrap();
1183        assert_eq!(fixed, content_no_markers);
1184
1185        // Document with markers - TOC detected and fixed
1186        let content_markers = r#"# Title
1187
1188<!-- toc -->
1189
1190- [Old Entry](#old-entry)
1191
1192<!-- tocstop -->
1193
1194## Heading 1
1195
1196Content.
1197"#;
1198        let ctx = create_ctx(content_markers);
1199        let fixed = rule.fix(&ctx).unwrap();
1200        assert!(fixed.contains("- [Heading 1](#heading-1)"));
1201        assert!(!fixed.contains("Old Entry"));
1202    }
1203
1204    // ========== Anchor Tests ==========
1205
1206    #[test]
1207    fn test_duplicate_heading_anchors() {
1208        let rule = MD073TocValidation::new();
1209        let content = r#"# Title
1210
1211<!-- toc -->
1212
1213<!-- tocstop -->
1214
1215## Duplicate
1216
1217Content.
1218
1219## Duplicate
1220
1221More content.
1222
1223## Duplicate
1224
1225Even more.
1226"#;
1227        let ctx = create_ctx(content);
1228        let region = rule.detect_toc_region(&ctx).unwrap();
1229        let expected = rule.build_expected_toc(&ctx, &region);
1230
1231        assert_eq!(expected.len(), 3);
1232        assert_eq!(expected[0].anchor, "duplicate");
1233        assert_eq!(expected[1].anchor, "duplicate-1");
1234        assert_eq!(expected[2].anchor, "duplicate-2");
1235    }
1236
1237    // ========== Edge Cases ==========
1238
1239    #[test]
1240    fn test_headings_in_code_blocks_ignored() {
1241        let rule = create_enabled_rule();
1242        let content = r#"# Title
1243
1244<!-- toc -->
1245
1246- [Real Heading](#real-heading)
1247
1248<!-- tocstop -->
1249
1250## Real Heading
1251
1252```markdown
1253## Fake Heading In Code
1254```
1255
1256Content.
1257"#;
1258        let ctx = create_ctx(content);
1259        let result = rule.check(&ctx).unwrap();
1260        assert!(result.is_empty(), "Should not report fake heading in code block");
1261    }
1262
1263    #[test]
1264    fn test_empty_toc_region() {
1265        let rule = create_enabled_rule();
1266        let content = r#"# Title
1267
1268<!-- toc -->
1269<!-- tocstop -->
1270
1271## Heading 1
1272
1273Content.
1274"#;
1275        let ctx = create_ctx(content);
1276        let result = rule.check(&ctx).unwrap();
1277        assert_eq!(result.len(), 1);
1278        assert!(result[0].message.contains("Missing entry"));
1279    }
1280
1281    #[test]
1282    fn test_nested_indentation() {
1283        let rule = create_enabled_rule();
1284
1285        let content = r#"<!-- toc -->
1286
1287<!-- tocstop -->
1288
1289## Level 2
1290
1291### Level 3
1292
1293#### Level 4
1294
1295## Another Level 2
1296"#;
1297        let ctx = create_ctx(content);
1298        let region = rule.detect_toc_region(&ctx).unwrap();
1299        let expected = rule.build_expected_toc(&ctx, &region);
1300        let toc = rule.generate_toc(&expected);
1301
1302        // Check indentation (always nested)
1303        assert!(toc.contains("- [Level 2](#level-2)"));
1304        assert!(toc.contains("  - [Level 3](#level-3)"));
1305        assert!(toc.contains("    - [Level 4](#level-4)"));
1306        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1307    }
1308
1309    // ========== Indentation Mismatch Tests ==========
1310
1311    #[test]
1312    fn test_indentation_mismatch_detected() {
1313        let rule = create_enabled_rule();
1314        // TOC entries are all at same indentation level, but headings have different levels
1315        let content = r#"<!-- toc -->
1316- [Hello](#hello)
1317- [Another](#another)
1318- [Heading](#heading)
1319<!-- tocstop -->
1320
1321## Hello
1322
1323### Another
1324
1325## Heading
1326"#;
1327        let ctx = create_ctx(content);
1328        let result = rule.check(&ctx).unwrap();
1329        // Should detect indentation mismatch - "Another" is level 3 but has no indent
1330        assert_eq!(result.len(), 1, "Should report indentation mismatch: {result:?}");
1331        assert!(
1332            result[0].message.contains("Indentation mismatch"),
1333            "Message should mention indentation: {}",
1334            result[0].message
1335        );
1336        assert!(
1337            result[0].message.contains("Another"),
1338            "Message should mention the entry: {}",
1339            result[0].message
1340        );
1341    }
1342
1343    #[test]
1344    fn test_indentation_mismatch_fixed() {
1345        let rule = create_enabled_rule();
1346        // TOC entries are all at same indentation level, but headings have different levels
1347        let content = r#"<!-- toc -->
1348- [Hello](#hello)
1349- [Another](#another)
1350- [Heading](#heading)
1351<!-- tocstop -->
1352
1353## Hello
1354
1355### Another
1356
1357## Heading
1358"#;
1359        let ctx = create_ctx(content);
1360        let fixed = rule.fix(&ctx).unwrap();
1361        // After fix, "Another" should be indented
1362        assert!(fixed.contains("- [Hello](#hello)"));
1363        assert!(fixed.contains("  - [Another](#another)")); // Indented with 2 spaces
1364        assert!(fixed.contains("- [Heading](#heading)"));
1365    }
1366
1367    #[test]
1368    fn test_no_indentation_mismatch_when_correct() {
1369        let rule = create_enabled_rule();
1370        // TOC has correct indentation
1371        let content = r#"<!-- toc -->
1372- [Hello](#hello)
1373  - [Another](#another)
1374- [Heading](#heading)
1375<!-- tocstop -->
1376
1377## Hello
1378
1379### Another
1380
1381## Heading
1382"#;
1383        let ctx = create_ctx(content);
1384        let result = rule.check(&ctx).unwrap();
1385        // Should not report any issues - indentation is correct
1386        assert!(result.is_empty(), "Should not report issues: {result:?}");
1387    }
1388
1389    // ========== Order Mismatch Tests ==========
1390
1391    #[test]
1392    fn test_order_mismatch_detected() {
1393        let rule = create_enabled_rule();
1394        let content = r#"# Title
1395
1396<!-- toc -->
1397
1398- [Section B](#section-b)
1399- [Section A](#section-a)
1400
1401<!-- tocstop -->
1402
1403## Section A
1404
1405Content A.
1406
1407## Section B
1408
1409Content B.
1410"#;
1411        let ctx = create_ctx(content);
1412        let result = rule.check(&ctx).unwrap();
1413        // Should detect order mismatch - Section B appears before Section A in TOC
1414        // but Section A comes first in document
1415        assert!(!result.is_empty(), "Should detect order mismatch");
1416    }
1417
1418    #[test]
1419    fn test_order_mismatch_ignored_when_disabled() {
1420        let mut rule = create_enabled_rule();
1421        rule.enforce_order = false;
1422        let content = r#"# Title
1423
1424<!-- toc -->
1425
1426- [Section B](#section-b)
1427- [Section A](#section-a)
1428
1429<!-- tocstop -->
1430
1431## Section A
1432
1433Content A.
1434
1435## Section B
1436
1437Content B.
1438"#;
1439        let ctx = create_ctx(content);
1440        let result = rule.check(&ctx).unwrap();
1441        // With enforce_order=false, order mismatches should be ignored
1442        assert!(result.is_empty(), "Should not report order mismatch when disabled");
1443    }
1444
1445    // ========== Unicode and Special Characters Tests ==========
1446
1447    #[test]
1448    fn test_unicode_headings() {
1449        let rule = create_enabled_rule();
1450        let content = r#"# Title
1451
1452<!-- toc -->
1453
1454- [日本語の見出し](#日本語の見出し)
1455- [Émojis 🎉](#émojis-)
1456
1457<!-- tocstop -->
1458
1459## 日本語の見出し
1460
1461Japanese content.
1462
1463## Émojis 🎉
1464
1465Content with emojis.
1466"#;
1467        let ctx = create_ctx(content);
1468        let result = rule.check(&ctx).unwrap();
1469        // Should handle unicode correctly
1470        assert!(result.is_empty(), "Should handle unicode headings");
1471    }
1472
1473    #[test]
1474    fn test_special_characters_in_headings() {
1475        let rule = create_enabled_rule();
1476        let content = r#"# Title
1477
1478<!-- toc -->
1479
1480- [What's New?](#whats-new)
1481- [C++ Guide](#c-guide)
1482
1483<!-- tocstop -->
1484
1485## What's New?
1486
1487News content.
1488
1489## C++ Guide
1490
1491C++ content.
1492"#;
1493        let ctx = create_ctx(content);
1494        let result = rule.check(&ctx).unwrap();
1495        assert!(result.is_empty(), "Should handle special characters");
1496    }
1497
1498    #[test]
1499    fn test_code_spans_in_headings() {
1500        let rule = create_enabled_rule();
1501        let content = r#"# Title
1502
1503<!-- toc -->
1504
1505- [`check [PATHS...]`](#check-paths)
1506
1507<!-- tocstop -->
1508
1509## `check [PATHS...]`
1510
1511Command documentation.
1512"#;
1513        let ctx = create_ctx(content);
1514        let result = rule.check(&ctx).unwrap();
1515        assert!(result.is_empty(), "Should handle code spans in headings with brackets");
1516    }
1517
1518    // ========== Config Tests ==========
1519
1520    #[test]
1521    fn test_from_config_defaults() {
1522        let config = crate::config::Config::default();
1523        let rule = MD073TocValidation::from_config(&config);
1524        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1525
1526        assert_eq!(rule.min_level, 2);
1527        assert_eq!(rule.max_level, 4);
1528        assert!(rule.enforce_order);
1529        assert_eq!(rule.indent, 2);
1530    }
1531
1532    #[test]
1533    fn test_indent_from_md007_config() {
1534        use crate::config::{Config, RuleConfig};
1535        use std::collections::BTreeMap;
1536
1537        let mut config = Config::default();
1538
1539        // Set MD007 indent to 4
1540        let mut md007_values = BTreeMap::new();
1541        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1542        config.rules.insert(
1543            "MD007".to_string(),
1544            RuleConfig {
1545                severity: None,
1546                values: md007_values,
1547            },
1548        );
1549
1550        let rule = MD073TocValidation::from_config(&config);
1551        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1552
1553        assert_eq!(rule.indent, 4, "Should read indent from MD007 config");
1554    }
1555
1556    #[test]
1557    fn test_indent_md073_overrides_md007() {
1558        use crate::config::{Config, RuleConfig};
1559        use std::collections::BTreeMap;
1560
1561        let mut config = Config::default();
1562
1563        // Set MD007 indent to 4
1564        let mut md007_values = BTreeMap::new();
1565        md007_values.insert("indent".to_string(), toml::Value::Integer(4));
1566        config.rules.insert(
1567            "MD007".to_string(),
1568            RuleConfig {
1569                severity: None,
1570                values: md007_values,
1571            },
1572        );
1573
1574        // Set MD073 indent to 3 (should override MD007)
1575        let mut md073_values = BTreeMap::new();
1576        md073_values.insert("enabled".to_string(), toml::Value::Boolean(true));
1577        md073_values.insert("indent".to_string(), toml::Value::Integer(3));
1578        config.rules.insert(
1579            "MD073".to_string(),
1580            RuleConfig {
1581                severity: None,
1582                values: md073_values,
1583            },
1584        );
1585
1586        let rule = MD073TocValidation::from_config(&config);
1587        let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1588
1589        assert_eq!(rule.indent, 3, "MD073 indent should override MD007");
1590    }
1591
1592    #[test]
1593    fn test_generate_toc_with_4_space_indent() {
1594        let mut rule = create_enabled_rule();
1595        rule.indent = 4;
1596
1597        let content = r#"<!-- toc -->
1598
1599<!-- tocstop -->
1600
1601## Level 2
1602
1603### Level 3
1604
1605#### Level 4
1606
1607## Another Level 2
1608"#;
1609        let ctx = create_ctx(content);
1610        let region = rule.detect_toc_region(&ctx).unwrap();
1611        let expected = rule.build_expected_toc(&ctx, &region);
1612        let toc = rule.generate_toc(&expected);
1613
1614        // With 4-space indent:
1615        // Level 2 = 0 spaces (base level)
1616        // Level 3 = 4 spaces
1617        // Level 4 = 8 spaces
1618        assert!(toc.contains("- [Level 2](#level-2)"), "Level 2 should have no indent");
1619        assert!(
1620            toc.contains("    - [Level 3](#level-3)"),
1621            "Level 3 should have 4-space indent"
1622        );
1623        assert!(
1624            toc.contains("        - [Level 4](#level-4)"),
1625            "Level 4 should have 8-space indent"
1626        );
1627        assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1628    }
1629
1630    #[test]
1631    fn test_validate_toc_with_4_space_indent() {
1632        let mut rule = create_enabled_rule();
1633        rule.indent = 4;
1634
1635        // TOC with correct 4-space indentation
1636        let content = r#"<!-- toc -->
1637- [Hello](#hello)
1638    - [Another](#another)
1639- [Heading](#heading)
1640<!-- tocstop -->
1641
1642## Hello
1643
1644### Another
1645
1646## Heading
1647"#;
1648        let ctx = create_ctx(content);
1649        let result = rule.check(&ctx).unwrap();
1650        assert!(
1651            result.is_empty(),
1652            "Should accept 4-space indent when configured: {result:?}"
1653        );
1654    }
1655
1656    #[test]
1657    fn test_validate_toc_wrong_indent_with_4_space_config() {
1658        let mut rule = create_enabled_rule();
1659        rule.indent = 4;
1660
1661        // TOC with 2-space indentation (wrong when 4-space is configured)
1662        let content = r#"<!-- toc -->
1663- [Hello](#hello)
1664  - [Another](#another)
1665- [Heading](#heading)
1666<!-- tocstop -->
1667
1668## Hello
1669
1670### Another
1671
1672## Heading
1673"#;
1674        let ctx = create_ctx(content);
1675        let result = rule.check(&ctx).unwrap();
1676        assert_eq!(result.len(), 1, "Should detect wrong indent");
1677        assert!(
1678            result[0].message.contains("Indentation mismatch"),
1679            "Should report indentation mismatch: {}",
1680            result[0].message
1681        );
1682        assert!(
1683            result[0].message.contains("expected 4 spaces"),
1684            "Should mention expected 4 spaces: {}",
1685            result[0].message
1686        );
1687    }
1688
1689    // ========== Markdown Stripping Tests ==========
1690
1691    #[test]
1692    fn test_strip_markdown_formatting_link() {
1693        let result = strip_markdown_formatting("Tool: [terminal](https://example.com)");
1694        assert_eq!(result, "Tool: terminal");
1695    }
1696
1697    #[test]
1698    fn test_strip_markdown_formatting_bold() {
1699        let result = strip_markdown_formatting("This is **bold** text");
1700        assert_eq!(result, "This is bold text");
1701
1702        let result = strip_markdown_formatting("This is __bold__ text");
1703        assert_eq!(result, "This is bold text");
1704    }
1705
1706    #[test]
1707    fn test_strip_markdown_formatting_italic() {
1708        let result = strip_markdown_formatting("This is *italic* text");
1709        assert_eq!(result, "This is italic text");
1710
1711        let result = strip_markdown_formatting("This is _italic_ text");
1712        assert_eq!(result, "This is italic text");
1713    }
1714
1715    #[test]
1716    fn test_strip_markdown_formatting_code_span() {
1717        let result = strip_markdown_formatting("Use the `format` function");
1718        assert_eq!(result, "Use the format function");
1719    }
1720
1721    #[test]
1722    fn test_strip_markdown_formatting_image() {
1723        let result = strip_markdown_formatting("See ![logo](image.png) for details");
1724        assert_eq!(result, "See logo for details");
1725    }
1726
1727    #[test]
1728    fn test_strip_markdown_formatting_reference_link() {
1729        let result = strip_markdown_formatting("See [documentation][docs] for details");
1730        assert_eq!(result, "See documentation for details");
1731    }
1732
1733    #[test]
1734    fn test_strip_markdown_formatting_combined() {
1735        // Link is stripped first, leaving bold, then bold is stripped
1736        let result = strip_markdown_formatting("Tool: [**terminal**](https://example.com)");
1737        assert_eq!(result, "Tool: terminal");
1738    }
1739
1740    #[test]
1741    fn test_toc_with_link_in_heading_matches_stripped_text() {
1742        let rule = create_enabled_rule();
1743
1744        // TOC entry text matches the stripped heading text
1745        let content = r#"# Title
1746
1747<!-- toc -->
1748
1749- [Tool: terminal](#tool-terminal)
1750
1751<!-- tocstop -->
1752
1753## Tool: [terminal](https://example.com)
1754
1755Content here.
1756"#;
1757        let ctx = create_ctx(content);
1758        let result = rule.check(&ctx).unwrap();
1759        assert!(
1760            result.is_empty(),
1761            "Stripped heading text should match TOC entry: {result:?}"
1762        );
1763    }
1764
1765    #[test]
1766    fn test_toc_with_simplified_text_still_mismatches() {
1767        let rule = create_enabled_rule();
1768
1769        // TOC entry "terminal" does NOT match stripped heading "Tool: terminal"
1770        let content = r#"# Title
1771
1772<!-- toc -->
1773
1774- [terminal](#tool-terminal)
1775
1776<!-- tocstop -->
1777
1778## Tool: [terminal](https://example.com)
1779
1780Content here.
1781"#;
1782        let ctx = create_ctx(content);
1783        let result = rule.check(&ctx).unwrap();
1784        assert_eq!(result.len(), 1, "Should report text mismatch");
1785        assert!(result[0].message.contains("Text mismatch"));
1786    }
1787
1788    #[test]
1789    fn test_fix_generates_stripped_toc_entries() {
1790        let rule = MD073TocValidation::new();
1791        let content = r#"# Title
1792
1793<!-- toc -->
1794
1795<!-- tocstop -->
1796
1797## Tool: [busybox](https://www.busybox.net/)
1798
1799Content.
1800
1801## Tool: [mount](https://en.wikipedia.org/wiki/Mount)
1802
1803More content.
1804"#;
1805        let ctx = create_ctx(content);
1806        let fixed = rule.fix(&ctx).unwrap();
1807
1808        // Generated TOC should have stripped text (links removed)
1809        assert!(
1810            fixed.contains("- [Tool: busybox](#tool-busybox)"),
1811            "TOC entry should have stripped link text"
1812        );
1813        assert!(
1814            fixed.contains("- [Tool: mount](#tool-mount)"),
1815            "TOC entry should have stripped link text"
1816        );
1817        // TOC entries should NOT contain the URL (the actual headings in the document still will)
1818        // Check only within the TOC region (between toc markers)
1819        let toc_start = fixed.find("<!-- toc -->").unwrap();
1820        let toc_end = fixed.find("<!-- tocstop -->").unwrap();
1821        let toc_content = &fixed[toc_start..toc_end];
1822        assert!(
1823            !toc_content.contains("busybox.net"),
1824            "TOC should not contain URLs: {toc_content}"
1825        );
1826        assert!(
1827            !toc_content.contains("wikipedia.org"),
1828            "TOC should not contain URLs: {toc_content}"
1829        );
1830    }
1831
1832    #[test]
1833    fn test_fix_with_bold_in_heading() {
1834        let rule = MD073TocValidation::new();
1835        let content = r#"# Title
1836
1837<!-- toc -->
1838
1839<!-- tocstop -->
1840
1841## **Important** Section
1842
1843Content.
1844"#;
1845        let ctx = create_ctx(content);
1846        let fixed = rule.fix(&ctx).unwrap();
1847
1848        // Generated TOC preserves bold markers in display text; anchor strips them.
1849        assert!(fixed.contains("- [**Important** Section](#important-section)"));
1850    }
1851
1852    #[test]
1853    fn test_fix_with_code_in_heading() {
1854        let rule = MD073TocValidation::new();
1855        let content = r#"# Title
1856
1857<!-- toc -->
1858
1859<!-- tocstop -->
1860
1861## Using `async` Functions
1862
1863Content.
1864"#;
1865        let ctx = create_ctx(content);
1866        let fixed = rule.fix(&ctx).unwrap();
1867
1868        // Generated TOC preserves code ticks in display text; anchor strips them.
1869        assert!(fixed.contains("- [Using `async` Functions](#using-async-functions)"));
1870    }
1871
1872    // ========== Custom Anchor Tests ==========
1873
1874    #[test]
1875    fn test_custom_anchor_id_respected() {
1876        let rule = create_enabled_rule();
1877        let content = r#"# Title
1878
1879<!-- toc -->
1880
1881- [My Section](#my-custom-anchor)
1882
1883<!-- tocstop -->
1884
1885## My Section {#my-custom-anchor}
1886
1887Content here.
1888"#;
1889        let ctx = create_ctx(content);
1890        let result = rule.check(&ctx).unwrap();
1891        assert!(result.is_empty(), "Should respect custom anchor IDs: {result:?}");
1892    }
1893
1894    #[test]
1895    fn test_custom_anchor_id_in_generated_toc() {
1896        let rule = create_enabled_rule();
1897        let content = r#"# Title
1898
1899<!-- toc -->
1900
1901<!-- tocstop -->
1902
1903## First Section {#custom-first}
1904
1905Content.
1906
1907## Second Section {#another-custom}
1908
1909More content.
1910"#;
1911        let ctx = create_ctx(content);
1912        let fixed = rule.fix(&ctx).unwrap();
1913        assert!(fixed.contains("- [First Section](#custom-first)"));
1914        assert!(fixed.contains("- [Second Section](#another-custom)"));
1915    }
1916
1917    #[test]
1918    fn test_mixed_custom_and_generated_anchors() {
1919        let rule = create_enabled_rule();
1920        let content = r#"# Title
1921
1922<!-- toc -->
1923
1924- [Custom Section](#my-id)
1925- [Normal Section](#normal-section)
1926
1927<!-- tocstop -->
1928
1929## Custom Section {#my-id}
1930
1931Content.
1932
1933## Normal Section
1934
1935More content.
1936"#;
1937        let ctx = create_ctx(content);
1938        let result = rule.check(&ctx).unwrap();
1939        assert!(result.is_empty(), "Should handle mixed custom and generated anchors");
1940    }
1941
1942    // ========== Anchor Generation Tests ==========
1943
1944    #[test]
1945    fn test_github_anchor_style() {
1946        let rule = create_enabled_rule();
1947
1948        let content = r#"<!-- toc -->
1949
1950<!-- tocstop -->
1951
1952## Test_With_Underscores
1953
1954Content.
1955"#;
1956        let ctx = create_ctx(content);
1957        let region = rule.detect_toc_region(&ctx).unwrap();
1958        let expected = rule.build_expected_toc(&ctx, &region);
1959
1960        // GitHub-style anchors preserve underscores
1961        assert_eq!(expected[0].anchor, "test_with_underscores");
1962    }
1963
1964    // ========== Stress Tests ==========
1965
1966    #[test]
1967    fn test_stress_many_headings() {
1968        let rule = create_enabled_rule();
1969
1970        // Generate a document with 150 headings
1971        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1972
1973        for i in 1..=150 {
1974            content.push_str(&format!("## Heading Number {i}\n\nContent for section {i}.\n\n"));
1975        }
1976
1977        let ctx = create_ctx(&content);
1978
1979        // Should not panic or timeout
1980        let result = rule.check(&ctx).unwrap();
1981
1982        // Should report missing entries for all 150 headings
1983        assert_eq!(result.len(), 1, "Should report single warning for TOC");
1984        assert!(result[0].message.contains("Missing entry"));
1985
1986        // Fix should generate TOC with 150 entries
1987        let fixed = rule.fix(&ctx).unwrap();
1988        assert!(fixed.contains("- [Heading Number 1](#heading-number-1)"));
1989        assert!(fixed.contains("- [Heading Number 100](#heading-number-100)"));
1990        assert!(fixed.contains("- [Heading Number 150](#heading-number-150)"));
1991    }
1992
1993    #[test]
1994    fn test_stress_deeply_nested() {
1995        let rule = create_enabled_rule();
1996        let content = r#"# Title
1997
1998<!-- toc -->
1999
2000<!-- tocstop -->
2001
2002## Level 2 A
2003
2004### Level 3 A
2005
2006#### Level 4 A
2007
2008## Level 2 B
2009
2010### Level 3 B
2011
2012#### Level 4 B
2013
2014## Level 2 C
2015
2016### Level 3 C
2017
2018#### Level 4 C
2019
2020## Level 2 D
2021
2022### Level 3 D
2023
2024#### Level 4 D
2025"#;
2026        let ctx = create_ctx(content);
2027        let fixed = rule.fix(&ctx).unwrap();
2028
2029        // Check nested indentation is correct
2030        assert!(fixed.contains("- [Level 2 A](#level-2-a)"));
2031        assert!(fixed.contains("  - [Level 3 A](#level-3-a)"));
2032        assert!(fixed.contains("    - [Level 4 A](#level-4-a)"));
2033        assert!(fixed.contains("- [Level 2 D](#level-2-d)"));
2034        assert!(fixed.contains("  - [Level 3 D](#level-3-d)"));
2035        assert!(fixed.contains("    - [Level 4 D](#level-4-d)"));
2036    }
2037
2038    // ==================== Duplicate TOC anchors ====================
2039
2040    #[test]
2041    fn test_duplicate_toc_anchors_produce_correct_diagnostics() {
2042        let rule = create_enabled_rule();
2043        // Document has headings "Example", "Another", "Example" which produce anchors:
2044        // "example", "another", "example-1"
2045        // TOC incorrectly uses #example twice instead of #example and #example-1
2046        let content = r#"# Document
2047
2048<!-- toc -->
2049
2050- [Example](#example)
2051- [Another](#another)
2052- [Example](#example)
2053
2054<!-- tocstop -->
2055
2056## Example
2057First.
2058
2059## Another
2060Middle.
2061
2062## Example
2063Second.
2064"#;
2065        let ctx = create_ctx(content);
2066        let result = rule.check(&ctx).unwrap();
2067
2068        // The TOC has #example twice but expected has #example and #example-1.
2069        // Should report that #example-1 is missing from the TOC.
2070        assert!(!result.is_empty(), "Should detect mismatch with duplicate TOC anchors");
2071        assert!(
2072            result[0].message.contains("Missing entry") || result[0].message.contains("Stale entry"),
2073            "Should report missing or stale entries for duplicate anchors. Got: {}",
2074            result[0].message
2075        );
2076    }
2077
2078    // ==================== Multi-backtick code spans ====================
2079
2080    #[test]
2081    fn test_strip_double_backtick_code_span() {
2082        // Double-backtick code spans should be stripped
2083        let result = strip_markdown_formatting("Using ``code with ` backtick``");
2084        assert_eq!(
2085            result, "Using code with ` backtick",
2086            "Should strip double-backtick code spans"
2087        );
2088    }
2089
2090    #[test]
2091    fn test_strip_triple_backtick_code_span() {
2092        // Triple-backtick code spans should be stripped
2093        let result = strip_markdown_formatting("Using ```code with `` backticks```");
2094        assert_eq!(
2095            result, "Using code with `` backticks",
2096            "Should strip triple-backtick code spans"
2097        );
2098    }
2099
2100    #[test]
2101    fn test_toc_with_double_backtick_heading() {
2102        let rule = create_enabled_rule();
2103        // Use fix() to generate the correct TOC (including anchor), then check()
2104        // should produce no warnings on the fixed output.
2105        let content = r#"# Title
2106
2107<!-- toc -->
2108
2109<!-- tocstop -->
2110
2111## Using ``code with ` backtick``
2112
2113Content here.
2114"#;
2115        let ctx = create_ctx(content);
2116        // The heading uses double-backtick code span: ``code with ` backtick``
2117        // TOC display text preserves the code span; anchor is derived from raw text.
2118        let fixed = rule.fix(&ctx).unwrap();
2119
2120        // Verify that the generated TOC entry preserves the double-backtick code span
2121        // in the display text.
2122        let toc_start = fixed.find("<!-- toc -->").unwrap();
2123        let toc_end = fixed.find("<!-- tocstop -->").unwrap();
2124        let toc_content = &fixed[toc_start..toc_end];
2125        assert!(
2126            toc_content.contains("``code with ` backtick``"),
2127            "Fix should preserve double-backtick code span in TOC display text. Got: {toc_content}"
2128        );
2129
2130        // After fix, check() must produce no warnings (idempotency check)
2131        let ctx2 = create_ctx(&fixed);
2132        let result = rule.check(&ctx2).unwrap();
2133        assert!(
2134            result.is_empty(),
2135            "check() should not warn on fixed output. Warnings: {result:?}"
2136        );
2137    }
2138
2139    #[test]
2140    fn test_stress_many_duplicates() {
2141        let rule = create_enabled_rule();
2142
2143        // Generate 50 headings with the same text
2144        let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
2145        for _ in 0..50 {
2146            content.push_str("## FAQ\n\nContent.\n\n");
2147        }
2148
2149        let ctx = create_ctx(&content);
2150        let region = rule.detect_toc_region(&ctx).unwrap();
2151        let expected = rule.build_expected_toc(&ctx, &region);
2152
2153        // Should generate unique anchors for all 50
2154        assert_eq!(expected.len(), 50);
2155        assert_eq!(expected[0].anchor, "faq");
2156        assert_eq!(expected[1].anchor, "faq-1");
2157        assert_eq!(expected[49].anchor, "faq-49");
2158    }
2159
2160    /// Core invariant: for every warning with a Fix, fix() must produce
2161    /// output consistent with applying that fix directly.
2162    #[test]
2163    fn test_roundtrip_check_and_fix_alignment() {
2164        let rule = create_enabled_rule();
2165
2166        let inputs = [
2167            // Stale entry
2168            "# Title\n\n<!-- toc -->\n- [Old Section](#old-section)\n<!-- tocstop -->\n\n## New Section\n",
2169            // Missing entry
2170            "# Title\n\n<!-- toc -->\n<!-- tocstop -->\n\n## One\n\n## Two\n",
2171            // Text mismatch
2172            "# Title\n\n<!-- toc -->\n- [Wrong Text](#real-section)\n<!-- tocstop -->\n\n## Real Section\n",
2173            // Already correct (no warnings, no change)
2174            "# Title\n\n<!-- toc -->\n- [One](#one)\n- [Two](#two)\n<!-- tocstop -->\n\n## One\n\n## Two\n",
2175        ];
2176
2177        for input in &inputs {
2178            let ctx = create_ctx(input);
2179            let fixed = rule.fix(&ctx).unwrap();
2180
2181            // Idempotency: fix(fix(x)) == fix(x)
2182            let ctx2 = create_ctx(&fixed);
2183            let fixed_twice = rule.fix(&ctx2).unwrap();
2184            assert_eq!(
2185                fixed, fixed_twice,
2186                "fix() is not idempotent for input: {input:?}\nfirst:  {fixed:?}\nsecond: {fixed_twice:?}"
2187            );
2188
2189            // After fix, check() should produce no warnings
2190            let warnings_after = rule.check(&ctx2).unwrap();
2191            assert!(
2192                warnings_after.is_empty(),
2193                "check() should return no warnings after fix() for input: {input:?}\nfixed: {fixed:?}\nwarnings: {warnings_after:?}"
2194            );
2195        }
2196    }
2197
2198    /// If a TOC has no mismatches, check() emits no warnings and fix()
2199    /// returns content unchanged.
2200    #[test]
2201    fn test_no_mismatch_preserves_content() {
2202        let rule = create_enabled_rule();
2203
2204        let content = "# Title\n\n<!-- toc -->\n- [First Section](#first-section)\n- [Second Section](#second-section)\n<!-- tocstop -->\n\n## First Section\n\ntext\n\n## Second Section\n\ntext\n";
2205        let ctx = create_ctx(content);
2206
2207        let warnings = rule.check(&ctx).unwrap();
2208        assert!(warnings.is_empty(), "No mismatches should emit no warnings");
2209
2210        let fixed = rule.fix(&ctx).unwrap();
2211        assert_eq!(fixed, content, "Content should be unchanged when TOC matches headings");
2212    }
2213
2214    /// Inline-disabled TOC should not be modified by fix().
2215    #[test]
2216    fn test_inline_disable_preserves_toc() {
2217        let rule = create_enabled_rule();
2218
2219        // TOC with a stale entry, but MD073 disabled for the TOC region
2220        let content = "# Title\n\n<!-- rumdl-disable MD073 -->\n<!-- toc -->\n- [Stale](#stale)\n<!-- tocstop -->\n<!-- rumdl-enable MD073 -->\n\n## Real\n";
2221        let ctx = create_ctx(content);
2222
2223        let fixed = rule.fix(&ctx).unwrap();
2224        assert_eq!(fixed, content, "TOC in a disabled region should be preserved exactly");
2225    }
2226
2227    // ========== Inline Formatting Preservation Tests (#634) ==========
2228
2229    /// Backticks in a heading must be preserved in the TOC display text.
2230    /// The anchor is generated from the raw heading text (which includes backticks)
2231    /// and must still use the stripped form.
2232    #[test]
2233    fn test_fix_code_ticks_preserved_in_toc_display_text() {
2234        let rule = MD073TocValidation::new();
2235        let content = r#"# Title
2236
2237<!-- toc -->
2238
2239<!-- tocstop -->
2240
2241### `my header`
2242
2243Content.
2244"#;
2245        let ctx = create_ctx(content);
2246        let fixed = rule.fix(&ctx).unwrap();
2247
2248        assert!(
2249            fixed.contains("- [`my header`](#my-header)"),
2250            "Code ticks must be preserved in TOC display text. Got: {fixed}"
2251        );
2252    }
2253
2254    /// A correct user-written TOC entry with code ticks must not be re-flagged.
2255    #[test]
2256    fn test_validate_toc_with_code_ticks_is_valid() {
2257        let rule = create_enabled_rule();
2258        let content = r#"# Title
2259
2260<!-- toc -->
2261
2262- [`my header`](#my-header)
2263
2264<!-- tocstop -->
2265
2266## `my header`
2267
2268Content.
2269"#;
2270        let ctx = create_ctx(content);
2271        let result = rule.check(&ctx).unwrap();
2272        assert!(
2273            result.is_empty(),
2274            "A TOC entry with preserved code ticks should be accepted as valid: {result:?}"
2275        );
2276    }
2277
2278    /// A heading with bold/italic preserves emphasis markers in the TOC display text;
2279    /// the anchor is generated from the raw (formatted) heading text and still uses
2280    /// the stripped form.
2281    #[test]
2282    fn test_fix_emphasis_preserved_in_toc_display_text() {
2283        let rule = MD073TocValidation::new();
2284        let content = r#"# Title
2285
2286<!-- toc -->
2287
2288<!-- tocstop -->
2289
2290## **bold** and *italic*
2291
2292Content.
2293"#;
2294        let ctx = create_ctx(content);
2295        let fixed = rule.fix(&ctx).unwrap();
2296
2297        assert!(
2298            fixed.contains("- [**bold** and *italic*](#bold-and-italic)"),
2299            "Emphasis markers must be preserved in TOC display text. Got: {fixed}"
2300        );
2301    }
2302
2303    /// A heading containing a link must have the link stripped from the TOC display
2304    /// text (nested links are invalid in Markdown).
2305    #[test]
2306    fn test_fix_link_in_heading_is_stripped() {
2307        let rule = MD073TocValidation::new();
2308        let content = r#"# Title
2309
2310<!-- toc -->
2311
2312<!-- tocstop -->
2313
2314## See [docs](http://example.com) for details
2315
2316Content.
2317"#;
2318        let ctx = create_ctx(content);
2319        let fixed = rule.fix(&ctx).unwrap();
2320
2321        assert!(
2322            fixed.contains("- [See docs for details](#see-docs-for-details)"),
2323            "Link must be stripped from TOC display text. Got: {fixed}"
2324        );
2325        // Ensure no URL leaks into TOC entry
2326        let toc_start = fixed.find("<!-- toc -->").unwrap();
2327        let toc_end = fixed.find("<!-- tocstop -->").unwrap();
2328        let toc_content = &fixed[toc_start..toc_end];
2329        assert!(
2330            !toc_content.contains("http://example.com"),
2331            "TOC should not contain link URL: {toc_content}"
2332        );
2333    }
2334
2335    /// An image in a heading must still be stripped from the TOC display text.
2336    #[test]
2337    fn test_fix_image_in_heading_is_stripped() {
2338        let rule = MD073TocValidation::new();
2339        let content = r#"# Title
2340
2341<!-- toc -->
2342
2343<!-- tocstop -->
2344
2345## Section ![icon](icon.png) Title
2346
2347Content.
2348"#;
2349        let ctx = create_ctx(content);
2350        let fixed = rule.fix(&ctx).unwrap();
2351
2352        assert!(
2353            fixed.contains("- [Section icon Title](#section-icon-title)"),
2354            "Image must be stripped from TOC display text. Got: {fixed}"
2355        );
2356    }
2357
2358    /// Running fix() twice on a document with inline-formatted headings must
2359    /// produce stable output (idempotency).
2360    #[test]
2361    fn test_fix_idempotent_with_inline_formatting() {
2362        let rule = MD073TocValidation::new();
2363        let content = r#"# Title
2364
2365<!-- toc -->
2366
2367<!-- tocstop -->
2368
2369## `code` heading
2370
2371### **bold** heading
2372
2373## See [link](http://x.com)
2374
2375"#;
2376        let ctx = create_ctx(content);
2377        let fixed1 = rule.fix(&ctx).unwrap();
2378        let ctx2 = create_ctx(&fixed1);
2379        let fixed2 = rule.fix(&ctx2).unwrap();
2380
2381        assert_eq!(fixed1, fixed2, "fix() must be idempotent for inline-formatted headings");
2382
2383        // After fix, check() must produce no warnings
2384        let warnings = rule.check(&ctx2).unwrap();
2385        assert!(
2386            warnings.is_empty(),
2387            "check() must not warn after fix() for inline-formatted headings: {warnings:?}"
2388        );
2389    }
2390
2391    /// Link-like syntax inside a code span must not be stripped, because it is
2392    /// literal content of the code span and not a real Markdown link.
2393    #[test]
2394    fn test_link_inside_code_span_preserved_in_toc() {
2395        let rule = MD073TocValidation::new();
2396        let content = r#"# Title
2397
2398<!-- toc -->
2399
2400<!-- tocstop -->
2401
2402## Use `[foo](bar)` syntax
2403
2404Content.
2405"#;
2406        let ctx = create_ctx(content);
2407        let fixed = rule.fix(&ctx).unwrap();
2408
2409        // The code span `[foo](bar)` must survive intact in the TOC display text.
2410        // The anchor is generated from the raw heading text by the GitHub algorithm,
2411        // which strips backtick, bracket, and paren characters. Verify only the
2412        // display-text preservation, not the exact anchor (which depends on the anchor
2413        // generation algorithm's treatment of non-alphanumeric chars in code spans).
2414        let toc_start = fixed.find("<!-- toc -->").unwrap();
2415        let toc_end = fixed.find("<!-- tocstop -->").unwrap();
2416        let toc_content = &fixed[toc_start..toc_end];
2417        assert!(
2418            toc_content.contains("Use `[foo](bar)` syntax"),
2419            "Link-like text inside code span must be preserved in TOC display text. Got: {toc_content}"
2420        );
2421        // Also ensure the real link stripping (outside code spans) still works
2422        assert!(
2423            !toc_content.contains("http://"),
2424            "Real links (outside code spans) should be stripped: {toc_content}"
2425        );
2426    }
2427}