1use crate::lint_context::LintContext;
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::anchor_styles::AnchorStyle;
8use regex::Regex;
9use std::collections::HashMap;
10use std::sync::LazyLock;
11
12static TOC_START_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*toc\s*-->").unwrap());
14
15static TOC_STOP_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)<!--\s*(?:tocstop|/toc)\s*-->").unwrap());
17
18static TOC_ENTRY_PATTERN: LazyLock<Regex> =
22 LazyLock::new(|| Regex::new(r"^(\s*)[-*]\s+\[([^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*)\]\(#([^)]+)\)").unwrap());
23
24#[derive(Debug, Clone)]
26struct TocRegion {
27 start_line: usize,
29 end_line: usize,
31 content_start: usize,
33 content_end: usize,
35}
36
37#[derive(Debug, Clone)]
39struct TocEntry {
40 text: String,
42 anchor: String,
44 indent_spaces: usize,
46}
47
48#[derive(Debug, Clone)]
50struct ExpectedTocEntry {
51 heading_line: usize,
53 level: u8,
55 text: String,
57 anchor: String,
59}
60
61#[derive(Debug)]
63enum TocMismatch {
64 StaleEntry { entry: TocEntry },
66 MissingEntry { expected: ExpectedTocEntry },
68 TextMismatch {
70 entry: TocEntry,
71 expected: ExpectedTocEntry,
72 },
73 OrderMismatch { entry: TocEntry, expected_position: usize },
75 IndentationMismatch {
77 entry: TocEntry,
78 actual_indent: usize,
79 expected_indent: usize,
80 },
81}
82
83static MARKDOWN_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\([^)]+\)").unwrap());
85static MARKDOWN_REF_LINK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[([^\]]+)\]\[[^\]]*\]").unwrap());
86static MARKDOWN_IMAGE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\([^)]+\)").unwrap());
87static MARKDOWN_CODE_SPAN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"`([^`]+)`").unwrap());
88static MARKDOWN_BOLD_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*([^*]+)\*\*").unwrap());
89static MARKDOWN_BOLD_UNDERSCORE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__([^_]+)__").unwrap());
90static MARKDOWN_ITALIC_ASTERISK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*([^*]+)\*").unwrap());
91static MARKDOWN_ITALIC_UNDERSCORE: LazyLock<Regex> =
94 LazyLock::new(|| Regex::new(r"(^|[^a-zA-Z0-9])_([^_]+)_([^a-zA-Z0-9]|$)").unwrap());
95
96fn strip_markdown_formatting(text: &str) -> String {
105 let mut result = text.to_string();
106
107 result = MARKDOWN_IMAGE.replace_all(&result, "$1").to_string();
109
110 result = MARKDOWN_LINK.replace_all(&result, "$1").to_string();
112
113 result = MARKDOWN_REF_LINK.replace_all(&result, "$1").to_string();
115
116 result = MARKDOWN_CODE_SPAN.replace_all(&result, "$1").to_string();
118
119 result = MARKDOWN_BOLD_ASTERISK.replace_all(&result, "$1").to_string();
121 result = MARKDOWN_BOLD_UNDERSCORE.replace_all(&result, "$1").to_string();
122
123 result = MARKDOWN_ITALIC_ASTERISK.replace_all(&result, "$1").to_string();
125 result = MARKDOWN_ITALIC_UNDERSCORE.replace_all(&result, "$1$2$3").to_string();
127
128 result
129}
130
131#[derive(Clone)]
157pub struct MD073TocValidation {
158 enabled: bool,
160 min_level: u8,
162 max_level: u8,
164 enforce_order: bool,
166}
167
168impl Default for MD073TocValidation {
169 fn default() -> Self {
170 Self {
171 enabled: false, min_level: 2,
173 max_level: 4,
174 enforce_order: true,
175 }
176 }
177}
178
179impl std::fmt::Debug for MD073TocValidation {
180 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
181 f.debug_struct("MD073TocValidation")
182 .field("enabled", &self.enabled)
183 .field("min_level", &self.min_level)
184 .field("max_level", &self.max_level)
185 .field("enforce_order", &self.enforce_order)
186 .finish()
187 }
188}
189
190impl MD073TocValidation {
191 pub fn new() -> Self {
193 Self::default()
194 }
195
196 fn detect_by_markers(&self, ctx: &LintContext) -> Option<TocRegion> {
198 let mut start_line = None;
199 let mut start_byte = None;
200
201 for (idx, line_info) in ctx.lines.iter().enumerate() {
202 let line_num = idx + 1;
203 let content = line_info.content(ctx.content);
204
205 if line_info.in_code_block || line_info.in_front_matter {
207 continue;
208 }
209
210 if let (Some(s_line), Some(s_byte)) = (start_line, start_byte) {
212 if TOC_STOP_MARKER.is_match(content) {
214 let end_line = line_num - 1;
215 let content_end = line_info.byte_offset;
216
217 if end_line < s_line {
219 return Some(TocRegion {
220 start_line: s_line,
221 end_line: s_line,
222 content_start: s_byte,
223 content_end: s_byte,
224 });
225 }
226
227 return Some(TocRegion {
228 start_line: s_line,
229 end_line,
230 content_start: s_byte,
231 content_end,
232 });
233 }
234 } else if TOC_START_MARKER.is_match(content) {
235 if idx + 1 < ctx.lines.len() {
237 start_line = Some(line_num + 1);
238 start_byte = Some(ctx.lines[idx + 1].byte_offset);
239 }
240 }
241 }
242
243 None
244 }
245
246 fn detect_toc_region(&self, ctx: &LintContext) -> Option<TocRegion> {
248 self.detect_by_markers(ctx)
249 }
250
251 fn extract_toc_entries(&self, ctx: &LintContext, region: &TocRegion) -> Vec<TocEntry> {
253 let mut entries = Vec::new();
254
255 for idx in (region.start_line - 1)..region.end_line.min(ctx.lines.len()) {
256 let line_info = &ctx.lines[idx];
257 let content = line_info.content(ctx.content);
258
259 if let Some(caps) = TOC_ENTRY_PATTERN.captures(content) {
260 let indent_spaces = caps.get(1).map_or(0, |m| m.as_str().len());
261 let text = caps.get(2).map_or("", |m| m.as_str()).to_string();
262 let anchor = caps.get(3).map_or("", |m| m.as_str()).to_string();
263
264 entries.push(TocEntry {
265 text,
266 anchor,
267 indent_spaces,
268 });
269 }
270 }
271
272 entries
273 }
274
275 fn build_expected_toc(&self, ctx: &LintContext, toc_region: &TocRegion) -> Vec<ExpectedTocEntry> {
277 let mut entries = Vec::new();
278 let mut fragment_counts: HashMap<String, usize> = HashMap::new();
279
280 for (idx, line_info) in ctx.lines.iter().enumerate() {
281 let line_num = idx + 1;
282
283 if line_num <= toc_region.end_line {
285 continue;
287 }
288
289 if line_info.in_code_block || line_info.in_front_matter || line_info.in_html_block {
291 continue;
292 }
293
294 if let Some(heading) = &line_info.heading {
295 if heading.level < self.min_level || heading.level > self.max_level {
297 continue;
298 }
299
300 let base_anchor = if let Some(custom_id) = &heading.custom_id {
302 custom_id.clone()
303 } else {
304 AnchorStyle::GitHub.generate_fragment(&heading.text)
305 };
306
307 let anchor = if let Some(count) = fragment_counts.get_mut(&base_anchor) {
309 let suffix = *count;
310 *count += 1;
311 format!("{base_anchor}-{suffix}")
312 } else {
313 fragment_counts.insert(base_anchor.clone(), 1);
314 base_anchor
315 };
316
317 entries.push(ExpectedTocEntry {
318 heading_line: line_num,
319 level: heading.level,
320 text: heading.text.clone(),
321 anchor,
322 });
323 }
324 }
325
326 entries
327 }
328
329 fn validate_toc(&self, actual: &[TocEntry], expected: &[ExpectedTocEntry]) -> Vec<TocMismatch> {
331 let mut mismatches = Vec::new();
332
333 let expected_anchors: HashMap<&str, &ExpectedTocEntry> =
335 expected.iter().map(|e| (e.anchor.as_str(), e)).collect();
336
337 let actual_anchors: HashMap<&str, &TocEntry> = actual.iter().map(|e| (e.anchor.as_str(), e)).collect();
339
340 for entry in actual {
342 if !expected_anchors.contains_key(entry.anchor.as_str()) {
343 mismatches.push(TocMismatch::StaleEntry { entry: entry.clone() });
344 }
345 }
346
347 for exp in expected {
349 if !actual_anchors.contains_key(exp.anchor.as_str()) {
350 mismatches.push(TocMismatch::MissingEntry { expected: exp.clone() });
351 }
352 }
353
354 for entry in actual {
356 if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
357 let actual_stripped = strip_markdown_formatting(entry.text.trim());
359 let expected_stripped = strip_markdown_formatting(exp.text.trim());
360 if actual_stripped != expected_stripped {
361 mismatches.push(TocMismatch::TextMismatch {
362 entry: entry.clone(),
363 expected: (*exp).clone(),
364 });
365 }
366 }
367 }
368
369 if !expected.is_empty() {
372 let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
373
374 for entry in actual {
375 if let Some(exp) = expected_anchors.get(entry.anchor.as_str()) {
376 let level_diff = exp.level.saturating_sub(base_level) as usize;
377 let expected_indent = level_diff * 2;
378
379 if entry.indent_spaces != expected_indent {
380 let already_reported = mismatches.iter().any(|m| match m {
382 TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
383 TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
384 _ => false,
385 });
386 if !already_reported {
387 mismatches.push(TocMismatch::IndentationMismatch {
388 entry: entry.clone(),
389 actual_indent: entry.indent_spaces,
390 expected_indent,
391 });
392 }
393 }
394 }
395 }
396 }
397
398 if self.enforce_order && !actual.is_empty() && !expected.is_empty() {
400 let expected_order: Vec<&str> = expected.iter().map(|e| e.anchor.as_str()).collect();
401
402 let mut expected_idx = 0;
404 for entry in actual {
405 if !expected_anchors.contains_key(entry.anchor.as_str()) {
407 continue;
408 }
409
410 while expected_idx < expected_order.len() && expected_order[expected_idx] != entry.anchor {
412 expected_idx += 1;
413 }
414
415 if expected_idx >= expected_order.len() {
416 let correct_pos = expected_order.iter().position(|a| *a == entry.anchor).unwrap_or(0);
418 let already_reported = mismatches.iter().any(|m| match m {
420 TocMismatch::StaleEntry { entry: e } => e.anchor == entry.anchor,
421 TocMismatch::TextMismatch { entry: e, .. } => e.anchor == entry.anchor,
422 _ => false,
423 });
424 if !already_reported {
425 mismatches.push(TocMismatch::OrderMismatch {
426 entry: entry.clone(),
427 expected_position: correct_pos + 1,
428 });
429 }
430 } else {
431 expected_idx += 1;
432 }
433 }
434 }
435
436 mismatches
437 }
438
439 fn generate_toc(&self, expected: &[ExpectedTocEntry]) -> String {
441 if expected.is_empty() {
442 return String::new();
443 }
444
445 let mut result = String::new();
446 let base_level = expected.iter().map(|e| e.level).min().unwrap_or(2);
447
448 for entry in expected {
449 let level_diff = entry.level.saturating_sub(base_level) as usize;
450 let indent = " ".repeat(level_diff);
451
452 let display_text = strip_markdown_formatting(&entry.text);
454 result.push_str(&format!("{indent}- [{display_text}](#{})\n", entry.anchor));
455 }
456
457 result
458 }
459}
460
461impl Rule for MD073TocValidation {
462 fn name(&self) -> &'static str {
463 "MD073"
464 }
465
466 fn description(&self) -> &'static str {
467 "Table of Contents should match document headings"
468 }
469
470 fn should_skip(&self, ctx: &LintContext) -> bool {
471 if !self.enabled {
473 return true;
474 }
475
476 let has_toc_marker = ctx.content.contains("<!-- toc") || ctx.content.contains("<!--toc");
478 !has_toc_marker
479 }
480
481 fn check(&self, ctx: &LintContext) -> LintResult {
482 let mut warnings = Vec::new();
483
484 let Some(region) = self.detect_toc_region(ctx) else {
486 return Ok(warnings);
488 };
489
490 let actual_entries = self.extract_toc_entries(ctx, ®ion);
492
493 let expected_entries = self.build_expected_toc(ctx, ®ion);
495
496 if expected_entries.is_empty() && actual_entries.is_empty() {
498 return Ok(warnings);
499 }
500
501 let mismatches = self.validate_toc(&actual_entries, &expected_entries);
503
504 if !mismatches.is_empty() {
505 let mut details = Vec::new();
507
508 for mismatch in &mismatches {
509 match mismatch {
510 TocMismatch::StaleEntry { entry } => {
511 details.push(format!("Stale entry: '{}' (heading no longer exists)", entry.text));
512 }
513 TocMismatch::MissingEntry { expected } => {
514 details.push(format!(
515 "Missing entry: '{}' (line {})",
516 expected.text, expected.heading_line
517 ));
518 }
519 TocMismatch::TextMismatch { entry, expected } => {
520 details.push(format!(
521 "Text mismatch: TOC has '{}', heading is '{}'",
522 entry.text, expected.text
523 ));
524 }
525 TocMismatch::OrderMismatch {
526 entry,
527 expected_position,
528 } => {
529 details.push(format!(
530 "Order mismatch: '{}' should be at position {}",
531 entry.text, expected_position
532 ));
533 }
534 TocMismatch::IndentationMismatch {
535 entry,
536 actual_indent,
537 expected_indent,
538 ..
539 } => {
540 details.push(format!(
541 "Indentation mismatch: '{}' has {} spaces, expected {} spaces",
542 entry.text, actual_indent, expected_indent
543 ));
544 }
545 }
546 }
547
548 let message = format!(
549 "Table of Contents does not match document headings: {}",
550 details.join("; ")
551 );
552
553 let new_toc = self.generate_toc(&expected_entries);
555 let fix_range = region.content_start..region.content_end;
556
557 warnings.push(LintWarning {
558 rule_name: Some(self.name().to_string()),
559 message,
560 line: region.start_line,
561 column: 1,
562 end_line: region.end_line,
563 end_column: 1,
564 severity: Severity::Warning,
565 fix: Some(Fix {
566 range: fix_range,
567 replacement: new_toc,
568 }),
569 });
570 }
571
572 Ok(warnings)
573 }
574
575 fn fix(&self, ctx: &LintContext) -> Result<String, LintError> {
576 let Some(region) = self.detect_toc_region(ctx) else {
578 return Ok(ctx.content.to_string());
580 };
581
582 let expected_entries = self.build_expected_toc(ctx, ®ion);
584
585 let new_toc = self.generate_toc(&expected_entries);
587
588 let mut result = String::with_capacity(ctx.content.len());
590 result.push_str(&ctx.content[..region.content_start]);
591 result.push_str(&new_toc);
592 result.push_str(&ctx.content[region.content_end..]);
593
594 Ok(result)
595 }
596
597 fn category(&self) -> RuleCategory {
598 RuleCategory::Other
599 }
600
601 fn as_any(&self) -> &dyn std::any::Any {
602 self
603 }
604
605 fn default_config_section(&self) -> Option<(String, toml::Value)> {
606 let value: toml::Value = toml::from_str(
607 r#"
608# Whether this rule is enabled (opt-in, disabled by default)
609enabled = false
610# Minimum heading level to include
611min-level = 2
612# Maximum heading level to include
613max-level = 4
614# Whether TOC order must match document order
615enforce-order = true
616"#,
617 )
618 .ok()?;
619 Some(("MD073".to_string(), value))
620 }
621
622 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
623 where
624 Self: Sized,
625 {
626 let mut rule = MD073TocValidation::default();
627
628 if let Some(rule_config) = config.rules.get("MD073") {
629 if let Some(enabled) = rule_config.values.get("enabled").and_then(|v| v.as_bool()) {
631 rule.enabled = enabled;
632 }
633
634 if let Some(min_level) = rule_config.values.get("min-level").and_then(|v| v.as_integer()) {
636 rule.min_level = (min_level.clamp(1, 6)) as u8;
637 }
638
639 if let Some(max_level) = rule_config.values.get("max-level").and_then(|v| v.as_integer()) {
641 rule.max_level = (max_level.clamp(1, 6)) as u8;
642 }
643
644 if let Some(enforce_order) = rule_config.values.get("enforce-order").and_then(|v| v.as_bool()) {
646 rule.enforce_order = enforce_order;
647 }
648 }
649
650 Box::new(rule)
651 }
652}
653
654#[cfg(test)]
655mod tests {
656 use super::*;
657 use crate::config::MarkdownFlavor;
658
659 fn create_ctx(content: &str) -> LintContext<'_> {
660 LintContext::new(content, MarkdownFlavor::Standard, None)
661 }
662
663 fn create_enabled_rule() -> MD073TocValidation {
665 MD073TocValidation {
666 enabled: true,
667 ..MD073TocValidation::default()
668 }
669 }
670
671 #[test]
674 fn test_detect_markers_basic() {
675 let rule = MD073TocValidation::new();
676 let content = r#"# Title
677
678<!-- toc -->
679
680- [Heading 1](#heading-1)
681
682<!-- tocstop -->
683
684## Heading 1
685
686Content here.
687"#;
688 let ctx = create_ctx(content);
689 let region = rule.detect_by_markers(&ctx);
690 assert!(region.is_some());
691 let region = region.unwrap();
692 assert_eq!(region.start_line, 4);
694 assert_eq!(region.end_line, 6);
695 }
696
697 #[test]
698 fn test_detect_markers_variations() {
699 let rule = MD073TocValidation::new();
700
701 let content1 = "<!--toc-->\n- [A](#a)\n<!--tocstop-->\n";
703 let ctx1 = create_ctx(content1);
704 assert!(rule.detect_by_markers(&ctx1).is_some());
705
706 let content2 = "<!-- TOC -->\n- [A](#a)\n<!-- TOCSTOP -->\n";
708 let ctx2 = create_ctx(content2);
709 assert!(rule.detect_by_markers(&ctx2).is_some());
710
711 let content3 = "<!-- toc -->\n- [A](#a)\n<!-- /toc -->\n";
713 let ctx3 = create_ctx(content3);
714 assert!(rule.detect_by_markers(&ctx3).is_some());
715 }
716
717 #[test]
718 fn test_no_toc_region() {
719 let rule = MD073TocValidation::new();
720 let content = r#"# Title
721
722## Heading 1
723
724Content here.
725
726## Heading 2
727
728More content.
729"#;
730 let ctx = create_ctx(content);
731 let region = rule.detect_toc_region(&ctx);
732 assert!(region.is_none());
733 }
734
735 #[test]
738 fn test_toc_matches_headings() {
739 let rule = create_enabled_rule();
740 let content = r#"# Title
741
742<!-- toc -->
743
744- [Heading 1](#heading-1)
745- [Heading 2](#heading-2)
746
747<!-- tocstop -->
748
749## Heading 1
750
751Content.
752
753## Heading 2
754
755More content.
756"#;
757 let ctx = create_ctx(content);
758 let result = rule.check(&ctx).unwrap();
759 assert!(result.is_empty(), "Expected no warnings for matching TOC");
760 }
761
762 #[test]
763 fn test_missing_entry() {
764 let rule = create_enabled_rule();
765 let content = r#"# Title
766
767<!-- toc -->
768
769- [Heading 1](#heading-1)
770
771<!-- tocstop -->
772
773## Heading 1
774
775Content.
776
777## Heading 2
778
779New heading not in TOC.
780"#;
781 let ctx = create_ctx(content);
782 let result = rule.check(&ctx).unwrap();
783 assert_eq!(result.len(), 1);
784 assert!(result[0].message.contains("Missing entry"));
785 assert!(result[0].message.contains("Heading 2"));
786 }
787
788 #[test]
789 fn test_stale_entry() {
790 let rule = create_enabled_rule();
791 let content = r#"# Title
792
793<!-- toc -->
794
795- [Heading 1](#heading-1)
796- [Deleted Heading](#deleted-heading)
797
798<!-- tocstop -->
799
800## Heading 1
801
802Content.
803"#;
804 let ctx = create_ctx(content);
805 let result = rule.check(&ctx).unwrap();
806 assert_eq!(result.len(), 1);
807 assert!(result[0].message.contains("Stale entry"));
808 assert!(result[0].message.contains("Deleted Heading"));
809 }
810
811 #[test]
812 fn test_text_mismatch() {
813 let rule = create_enabled_rule();
814 let content = r#"# Title
815
816<!-- toc -->
817
818- [Old Name](#heading-1)
819
820<!-- tocstop -->
821
822## Heading 1
823
824Content.
825"#;
826 let ctx = create_ctx(content);
827 let result = rule.check(&ctx).unwrap();
828 assert_eq!(result.len(), 1);
829 assert!(result[0].message.contains("Text mismatch"));
830 }
831
832 #[test]
835 fn test_min_level_excludes_h1() {
836 let mut rule = MD073TocValidation::new();
837 rule.min_level = 2;
838
839 let content = r#"<!-- toc -->
840
841<!-- tocstop -->
842
843# Should Be Excluded
844
845## Should Be Included
846
847Content.
848"#;
849 let ctx = create_ctx(content);
850 let region = rule.detect_toc_region(&ctx).unwrap();
851 let expected = rule.build_expected_toc(&ctx, ®ion);
852
853 assert_eq!(expected.len(), 1);
854 assert_eq!(expected[0].text, "Should Be Included");
855 }
856
857 #[test]
858 fn test_max_level_excludes_h5_h6() {
859 let mut rule = MD073TocValidation::new();
860 rule.max_level = 4;
861
862 let content = r#"<!-- toc -->
863
864<!-- tocstop -->
865
866## Level 2
867
868### Level 3
869
870#### Level 4
871
872##### Level 5 Should Be Excluded
873
874###### Level 6 Should Be Excluded
875"#;
876 let ctx = create_ctx(content);
877 let region = rule.detect_toc_region(&ctx).unwrap();
878 let expected = rule.build_expected_toc(&ctx, ®ion);
879
880 assert_eq!(expected.len(), 3);
881 assert!(expected.iter().all(|e| e.level <= 4));
882 }
883
884 #[test]
887 fn test_fix_adds_missing_entry() {
888 let rule = MD073TocValidation::new();
889 let content = r#"# Title
890
891<!-- toc -->
892
893- [Heading 1](#heading-1)
894
895<!-- tocstop -->
896
897## Heading 1
898
899Content.
900
901## Heading 2
902
903New heading.
904"#;
905 let ctx = create_ctx(content);
906 let fixed = rule.fix(&ctx).unwrap();
907 assert!(fixed.contains("- [Heading 2](#heading-2)"));
908 }
909
910 #[test]
911 fn test_fix_removes_stale_entry() {
912 let rule = MD073TocValidation::new();
913 let content = r#"# Title
914
915<!-- toc -->
916
917- [Heading 1](#heading-1)
918- [Deleted](#deleted)
919
920<!-- tocstop -->
921
922## Heading 1
923
924Content.
925"#;
926 let ctx = create_ctx(content);
927 let fixed = rule.fix(&ctx).unwrap();
928 assert!(fixed.contains("- [Heading 1](#heading-1)"));
929 assert!(!fixed.contains("Deleted"));
930 }
931
932 #[test]
933 fn test_fix_idempotent() {
934 let rule = MD073TocValidation::new();
935 let content = r#"# Title
936
937<!-- toc -->
938
939- [Heading 1](#heading-1)
940- [Heading 2](#heading-2)
941
942<!-- tocstop -->
943
944## Heading 1
945
946Content.
947
948## Heading 2
949
950More.
951"#;
952 let ctx = create_ctx(content);
953 let fixed1 = rule.fix(&ctx).unwrap();
954 let ctx2 = create_ctx(&fixed1);
955 let fixed2 = rule.fix(&ctx2).unwrap();
956
957 assert_eq!(fixed1, fixed2);
959 }
960
961 #[test]
962 fn test_fix_preserves_markers() {
963 let rule = MD073TocValidation::new();
964 let content = r#"# Title
965
966<!-- toc -->
967
968Old TOC content.
969
970<!-- tocstop -->
971
972## New Heading
973
974Content.
975"#;
976 let ctx = create_ctx(content);
977 let fixed = rule.fix(&ctx).unwrap();
978
979 assert!(fixed.contains("<!-- toc -->"));
981 assert!(fixed.contains("<!-- tocstop -->"));
982 assert!(fixed.contains("- [New Heading](#new-heading)"));
984 }
985
986 #[test]
987 fn test_fix_requires_markers() {
988 let rule = create_enabled_rule();
989
990 let content_no_markers = r#"# Title
992
993## Heading 1
994
995Content.
996"#;
997 let ctx = create_ctx(content_no_markers);
998 let fixed = rule.fix(&ctx).unwrap();
999 assert_eq!(fixed, content_no_markers);
1000
1001 let content_markers = r#"# Title
1003
1004<!-- toc -->
1005
1006- [Old Entry](#old-entry)
1007
1008<!-- tocstop -->
1009
1010## Heading 1
1011
1012Content.
1013"#;
1014 let ctx = create_ctx(content_markers);
1015 let fixed = rule.fix(&ctx).unwrap();
1016 assert!(fixed.contains("- [Heading 1](#heading-1)"));
1017 assert!(!fixed.contains("Old Entry"));
1018 }
1019
1020 #[test]
1023 fn test_duplicate_heading_anchors() {
1024 let rule = MD073TocValidation::new();
1025 let content = r#"# Title
1026
1027<!-- toc -->
1028
1029<!-- tocstop -->
1030
1031## Duplicate
1032
1033Content.
1034
1035## Duplicate
1036
1037More content.
1038
1039## Duplicate
1040
1041Even more.
1042"#;
1043 let ctx = create_ctx(content);
1044 let region = rule.detect_toc_region(&ctx).unwrap();
1045 let expected = rule.build_expected_toc(&ctx, ®ion);
1046
1047 assert_eq!(expected.len(), 3);
1048 assert_eq!(expected[0].anchor, "duplicate");
1049 assert_eq!(expected[1].anchor, "duplicate-1");
1050 assert_eq!(expected[2].anchor, "duplicate-2");
1051 }
1052
1053 #[test]
1056 fn test_headings_in_code_blocks_ignored() {
1057 let rule = create_enabled_rule();
1058 let content = r#"# Title
1059
1060<!-- toc -->
1061
1062- [Real Heading](#real-heading)
1063
1064<!-- tocstop -->
1065
1066## Real Heading
1067
1068```markdown
1069## Fake Heading In Code
1070```
1071
1072Content.
1073"#;
1074 let ctx = create_ctx(content);
1075 let result = rule.check(&ctx).unwrap();
1076 assert!(result.is_empty(), "Should not report fake heading in code block");
1077 }
1078
1079 #[test]
1080 fn test_empty_toc_region() {
1081 let rule = create_enabled_rule();
1082 let content = r#"# Title
1083
1084<!-- toc -->
1085<!-- tocstop -->
1086
1087## Heading 1
1088
1089Content.
1090"#;
1091 let ctx = create_ctx(content);
1092 let result = rule.check(&ctx).unwrap();
1093 assert_eq!(result.len(), 1);
1094 assert!(result[0].message.contains("Missing entry"));
1095 }
1096
1097 #[test]
1098 fn test_nested_indentation() {
1099 let rule = create_enabled_rule();
1100
1101 let content = r#"<!-- toc -->
1102
1103<!-- tocstop -->
1104
1105## Level 2
1106
1107### Level 3
1108
1109#### Level 4
1110
1111## Another Level 2
1112"#;
1113 let ctx = create_ctx(content);
1114 let region = rule.detect_toc_region(&ctx).unwrap();
1115 let expected = rule.build_expected_toc(&ctx, ®ion);
1116 let toc = rule.generate_toc(&expected);
1117
1118 assert!(toc.contains("- [Level 2](#level-2)"));
1120 assert!(toc.contains(" - [Level 3](#level-3)"));
1121 assert!(toc.contains(" - [Level 4](#level-4)"));
1122 assert!(toc.contains("- [Another Level 2](#another-level-2)"));
1123 }
1124
1125 #[test]
1128 fn test_indentation_mismatch_detected() {
1129 let rule = create_enabled_rule();
1130 let content = r#"<!-- toc -->
1132- [Hello](#hello)
1133- [Another](#another)
1134- [Heading](#heading)
1135<!-- tocstop -->
1136
1137## Hello
1138
1139### Another
1140
1141## Heading
1142"#;
1143 let ctx = create_ctx(content);
1144 let result = rule.check(&ctx).unwrap();
1145 assert_eq!(result.len(), 1, "Should report indentation mismatch: {result:?}");
1147 assert!(
1148 result[0].message.contains("Indentation mismatch"),
1149 "Message should mention indentation: {}",
1150 result[0].message
1151 );
1152 assert!(
1153 result[0].message.contains("Another"),
1154 "Message should mention the entry: {}",
1155 result[0].message
1156 );
1157 }
1158
1159 #[test]
1160 fn test_indentation_mismatch_fixed() {
1161 let rule = create_enabled_rule();
1162 let content = r#"<!-- toc -->
1164- [Hello](#hello)
1165- [Another](#another)
1166- [Heading](#heading)
1167<!-- tocstop -->
1168
1169## Hello
1170
1171### Another
1172
1173## Heading
1174"#;
1175 let ctx = create_ctx(content);
1176 let fixed = rule.fix(&ctx).unwrap();
1177 assert!(fixed.contains("- [Hello](#hello)"));
1179 assert!(fixed.contains(" - [Another](#another)")); assert!(fixed.contains("- [Heading](#heading)"));
1181 }
1182
1183 #[test]
1184 fn test_no_indentation_mismatch_when_correct() {
1185 let rule = create_enabled_rule();
1186 let content = r#"<!-- toc -->
1188- [Hello](#hello)
1189 - [Another](#another)
1190- [Heading](#heading)
1191<!-- tocstop -->
1192
1193## Hello
1194
1195### Another
1196
1197## Heading
1198"#;
1199 let ctx = create_ctx(content);
1200 let result = rule.check(&ctx).unwrap();
1201 assert!(result.is_empty(), "Should not report issues: {result:?}");
1203 }
1204
1205 #[test]
1208 fn test_order_mismatch_detected() {
1209 let rule = create_enabled_rule();
1210 let content = r#"# Title
1211
1212<!-- toc -->
1213
1214- [Section B](#section-b)
1215- [Section A](#section-a)
1216
1217<!-- tocstop -->
1218
1219## Section A
1220
1221Content A.
1222
1223## Section B
1224
1225Content B.
1226"#;
1227 let ctx = create_ctx(content);
1228 let result = rule.check(&ctx).unwrap();
1229 assert!(!result.is_empty(), "Should detect order mismatch");
1232 }
1233
1234 #[test]
1235 fn test_order_mismatch_ignored_when_disabled() {
1236 let mut rule = create_enabled_rule();
1237 rule.enforce_order = false;
1238 let content = r#"# Title
1239
1240<!-- toc -->
1241
1242- [Section B](#section-b)
1243- [Section A](#section-a)
1244
1245<!-- tocstop -->
1246
1247## Section A
1248
1249Content A.
1250
1251## Section B
1252
1253Content B.
1254"#;
1255 let ctx = create_ctx(content);
1256 let result = rule.check(&ctx).unwrap();
1257 assert!(result.is_empty(), "Should not report order mismatch when disabled");
1259 }
1260
1261 #[test]
1264 fn test_unicode_headings() {
1265 let rule = create_enabled_rule();
1266 let content = r#"# Title
1267
1268<!-- toc -->
1269
1270- [日本語の見出し](#日本語の見出し)
1271- [Émojis 🎉](#émojis-)
1272
1273<!-- tocstop -->
1274
1275## 日本語の見出し
1276
1277Japanese content.
1278
1279## Émojis 🎉
1280
1281Content with emojis.
1282"#;
1283 let ctx = create_ctx(content);
1284 let result = rule.check(&ctx).unwrap();
1285 assert!(result.is_empty(), "Should handle unicode headings");
1287 }
1288
1289 #[test]
1290 fn test_special_characters_in_headings() {
1291 let rule = create_enabled_rule();
1292 let content = r#"# Title
1293
1294<!-- toc -->
1295
1296- [What's New?](#whats-new)
1297- [C++ Guide](#c-guide)
1298
1299<!-- tocstop -->
1300
1301## What's New?
1302
1303News content.
1304
1305## C++ Guide
1306
1307C++ content.
1308"#;
1309 let ctx = create_ctx(content);
1310 let result = rule.check(&ctx).unwrap();
1311 assert!(result.is_empty(), "Should handle special characters");
1312 }
1313
1314 #[test]
1315 fn test_code_spans_in_headings() {
1316 let rule = create_enabled_rule();
1317 let content = r#"# Title
1318
1319<!-- toc -->
1320
1321- [`check [PATHS...]`](#check-paths)
1322
1323<!-- tocstop -->
1324
1325## `check [PATHS...]`
1326
1327Command documentation.
1328"#;
1329 let ctx = create_ctx(content);
1330 let result = rule.check(&ctx).unwrap();
1331 assert!(result.is_empty(), "Should handle code spans in headings with brackets");
1332 }
1333
1334 #[test]
1337 fn test_from_config_defaults() {
1338 let config = crate::config::Config::default();
1339 let rule = MD073TocValidation::from_config(&config);
1340 let rule = rule.as_any().downcast_ref::<MD073TocValidation>().unwrap();
1341
1342 assert_eq!(rule.min_level, 2);
1343 assert_eq!(rule.max_level, 4);
1344 assert!(rule.enforce_order);
1345 }
1346
1347 #[test]
1350 fn test_strip_markdown_formatting_link() {
1351 let result = strip_markdown_formatting("Tool: [terminal](https://example.com)");
1352 assert_eq!(result, "Tool: terminal");
1353 }
1354
1355 #[test]
1356 fn test_strip_markdown_formatting_bold() {
1357 let result = strip_markdown_formatting("This is **bold** text");
1358 assert_eq!(result, "This is bold text");
1359
1360 let result = strip_markdown_formatting("This is __bold__ text");
1361 assert_eq!(result, "This is bold text");
1362 }
1363
1364 #[test]
1365 fn test_strip_markdown_formatting_italic() {
1366 let result = strip_markdown_formatting("This is *italic* text");
1367 assert_eq!(result, "This is italic text");
1368
1369 let result = strip_markdown_formatting("This is _italic_ text");
1370 assert_eq!(result, "This is italic text");
1371 }
1372
1373 #[test]
1374 fn test_strip_markdown_formatting_code_span() {
1375 let result = strip_markdown_formatting("Use the `format` function");
1376 assert_eq!(result, "Use the format function");
1377 }
1378
1379 #[test]
1380 fn test_strip_markdown_formatting_image() {
1381 let result = strip_markdown_formatting("See  for details");
1382 assert_eq!(result, "See logo for details");
1383 }
1384
1385 #[test]
1386 fn test_strip_markdown_formatting_reference_link() {
1387 let result = strip_markdown_formatting("See [documentation][docs] for details");
1388 assert_eq!(result, "See documentation for details");
1389 }
1390
1391 #[test]
1392 fn test_strip_markdown_formatting_combined() {
1393 let result = strip_markdown_formatting("Tool: [**terminal**](https://example.com)");
1395 assert_eq!(result, "Tool: terminal");
1396 }
1397
1398 #[test]
1399 fn test_toc_with_link_in_heading_matches_stripped_text() {
1400 let rule = create_enabled_rule();
1401
1402 let content = r#"# Title
1404
1405<!-- toc -->
1406
1407- [Tool: terminal](#tool-terminal)
1408
1409<!-- tocstop -->
1410
1411## Tool: [terminal](https://example.com)
1412
1413Content here.
1414"#;
1415 let ctx = create_ctx(content);
1416 let result = rule.check(&ctx).unwrap();
1417 assert!(
1418 result.is_empty(),
1419 "Stripped heading text should match TOC entry: {result:?}"
1420 );
1421 }
1422
1423 #[test]
1424 fn test_toc_with_simplified_text_still_mismatches() {
1425 let rule = create_enabled_rule();
1426
1427 let content = r#"# Title
1429
1430<!-- toc -->
1431
1432- [terminal](#tool-terminal)
1433
1434<!-- tocstop -->
1435
1436## Tool: [terminal](https://example.com)
1437
1438Content here.
1439"#;
1440 let ctx = create_ctx(content);
1441 let result = rule.check(&ctx).unwrap();
1442 assert_eq!(result.len(), 1, "Should report text mismatch");
1443 assert!(result[0].message.contains("Text mismatch"));
1444 }
1445
1446 #[test]
1447 fn test_fix_generates_stripped_toc_entries() {
1448 let rule = MD073TocValidation::new();
1449 let content = r#"# Title
1450
1451<!-- toc -->
1452
1453<!-- tocstop -->
1454
1455## Tool: [busybox](https://www.busybox.net/)
1456
1457Content.
1458
1459## Tool: [mount](https://en.wikipedia.org/wiki/Mount)
1460
1461More content.
1462"#;
1463 let ctx = create_ctx(content);
1464 let fixed = rule.fix(&ctx).unwrap();
1465
1466 assert!(
1468 fixed.contains("- [Tool: busybox](#tool-busybox)"),
1469 "TOC entry should have stripped link text"
1470 );
1471 assert!(
1472 fixed.contains("- [Tool: mount](#tool-mount)"),
1473 "TOC entry should have stripped link text"
1474 );
1475 let toc_start = fixed.find("<!-- toc -->").unwrap();
1478 let toc_end = fixed.find("<!-- tocstop -->").unwrap();
1479 let toc_content = &fixed[toc_start..toc_end];
1480 assert!(
1481 !toc_content.contains("busybox.net"),
1482 "TOC should not contain URLs: {toc_content}"
1483 );
1484 assert!(
1485 !toc_content.contains("wikipedia.org"),
1486 "TOC should not contain URLs: {toc_content}"
1487 );
1488 }
1489
1490 #[test]
1491 fn test_fix_with_bold_in_heading() {
1492 let rule = MD073TocValidation::new();
1493 let content = r#"# Title
1494
1495<!-- toc -->
1496
1497<!-- tocstop -->
1498
1499## **Important** Section
1500
1501Content.
1502"#;
1503 let ctx = create_ctx(content);
1504 let fixed = rule.fix(&ctx).unwrap();
1505
1506 assert!(fixed.contains("- [Important Section](#important-section)"));
1508 }
1509
1510 #[test]
1511 fn test_fix_with_code_in_heading() {
1512 let rule = MD073TocValidation::new();
1513 let content = r#"# Title
1514
1515<!-- toc -->
1516
1517<!-- tocstop -->
1518
1519## Using `async` Functions
1520
1521Content.
1522"#;
1523 let ctx = create_ctx(content);
1524 let fixed = rule.fix(&ctx).unwrap();
1525
1526 assert!(fixed.contains("- [Using async Functions](#using-async-functions)"));
1528 }
1529
1530 #[test]
1533 fn test_custom_anchor_id_respected() {
1534 let rule = create_enabled_rule();
1535 let content = r#"# Title
1536
1537<!-- toc -->
1538
1539- [My Section](#my-custom-anchor)
1540
1541<!-- tocstop -->
1542
1543## My Section {#my-custom-anchor}
1544
1545Content here.
1546"#;
1547 let ctx = create_ctx(content);
1548 let result = rule.check(&ctx).unwrap();
1549 assert!(result.is_empty(), "Should respect custom anchor IDs: {result:?}");
1550 }
1551
1552 #[test]
1553 fn test_custom_anchor_id_in_generated_toc() {
1554 let rule = create_enabled_rule();
1555 let content = r#"# Title
1556
1557<!-- toc -->
1558
1559<!-- tocstop -->
1560
1561## First Section {#custom-first}
1562
1563Content.
1564
1565## Second Section {#another-custom}
1566
1567More content.
1568"#;
1569 let ctx = create_ctx(content);
1570 let fixed = rule.fix(&ctx).unwrap();
1571 assert!(fixed.contains("- [First Section](#custom-first)"));
1572 assert!(fixed.contains("- [Second Section](#another-custom)"));
1573 }
1574
1575 #[test]
1576 fn test_mixed_custom_and_generated_anchors() {
1577 let rule = create_enabled_rule();
1578 let content = r#"# Title
1579
1580<!-- toc -->
1581
1582- [Custom Section](#my-id)
1583- [Normal Section](#normal-section)
1584
1585<!-- tocstop -->
1586
1587## Custom Section {#my-id}
1588
1589Content.
1590
1591## Normal Section
1592
1593More content.
1594"#;
1595 let ctx = create_ctx(content);
1596 let result = rule.check(&ctx).unwrap();
1597 assert!(result.is_empty(), "Should handle mixed custom and generated anchors");
1598 }
1599
1600 #[test]
1603 fn test_github_anchor_style() {
1604 let rule = create_enabled_rule();
1605
1606 let content = r#"<!-- toc -->
1607
1608<!-- tocstop -->
1609
1610## Test_With_Underscores
1611
1612Content.
1613"#;
1614 let ctx = create_ctx(content);
1615 let region = rule.detect_toc_region(&ctx).unwrap();
1616 let expected = rule.build_expected_toc(&ctx, ®ion);
1617
1618 assert_eq!(expected[0].anchor, "test_with_underscores");
1620 }
1621
1622 #[test]
1625 fn test_stress_many_headings() {
1626 let rule = create_enabled_rule();
1627
1628 let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1630
1631 for i in 1..=150 {
1632 content.push_str(&format!("## Heading Number {i}\n\nContent for section {i}.\n\n"));
1633 }
1634
1635 let ctx = create_ctx(&content);
1636
1637 let result = rule.check(&ctx).unwrap();
1639
1640 assert_eq!(result.len(), 1, "Should report single warning for TOC");
1642 assert!(result[0].message.contains("Missing entry"));
1643
1644 let fixed = rule.fix(&ctx).unwrap();
1646 assert!(fixed.contains("- [Heading Number 1](#heading-number-1)"));
1647 assert!(fixed.contains("- [Heading Number 100](#heading-number-100)"));
1648 assert!(fixed.contains("- [Heading Number 150](#heading-number-150)"));
1649 }
1650
1651 #[test]
1652 fn test_stress_deeply_nested() {
1653 let rule = create_enabled_rule();
1654 let content = r#"# Title
1655
1656<!-- toc -->
1657
1658<!-- tocstop -->
1659
1660## Level 2 A
1661
1662### Level 3 A
1663
1664#### Level 4 A
1665
1666## Level 2 B
1667
1668### Level 3 B
1669
1670#### Level 4 B
1671
1672## Level 2 C
1673
1674### Level 3 C
1675
1676#### Level 4 C
1677
1678## Level 2 D
1679
1680### Level 3 D
1681
1682#### Level 4 D
1683"#;
1684 let ctx = create_ctx(content);
1685 let fixed = rule.fix(&ctx).unwrap();
1686
1687 assert!(fixed.contains("- [Level 2 A](#level-2-a)"));
1689 assert!(fixed.contains(" - [Level 3 A](#level-3-a)"));
1690 assert!(fixed.contains(" - [Level 4 A](#level-4-a)"));
1691 assert!(fixed.contains("- [Level 2 D](#level-2-d)"));
1692 assert!(fixed.contains(" - [Level 3 D](#level-3-d)"));
1693 assert!(fixed.contains(" - [Level 4 D](#level-4-d)"));
1694 }
1695
1696 #[test]
1697 fn test_stress_many_duplicates() {
1698 let rule = create_enabled_rule();
1699
1700 let mut content = String::from("# Title\n\n<!-- toc -->\n\n<!-- tocstop -->\n\n");
1702 for _ in 0..50 {
1703 content.push_str("## FAQ\n\nContent.\n\n");
1704 }
1705
1706 let ctx = create_ctx(&content);
1707 let region = rule.detect_toc_region(&ctx).unwrap();
1708 let expected = rule.build_expected_toc(&ctx, ®ion);
1709
1710 assert_eq!(expected.len(), 50);
1712 assert_eq!(expected[0].anchor, "faq");
1713 assert_eq!(expected[1].anchor, "faq-1");
1714 assert_eq!(expected[49].anchor, "faq-49");
1715 }
1716}