Skip to main content

rumdl_lib/rules/
md066_footnote_validation.rs

1use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use regex::Regex;
3use std::collections::{HashMap, HashSet};
4use std::sync::LazyLock;
5
6/// Pattern to match footnote definitions: [^id]: content
7/// Matches at start of line, with 0-3 leading spaces, caret in brackets
8/// Also handles definitions inside blockquotes (after stripping > prefixes)
9pub static FOOTNOTE_DEF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[ ]{0,3}\[\^([^\]]+)\]:").unwrap());
10
11/// Pattern to match footnote references in text: [^id]
12/// Callers must manually check that the match is NOT followed by `:` (which would make it a definition)
13pub static FOOTNOTE_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[\^([^\]]+)\]").unwrap());
14
15/// Strip blockquote prefixes from a line to check for footnote definitions
16/// Handles nested blockquotes like `> > > ` and variations with/without spaces
17pub fn strip_blockquote_prefix(line: &str) -> &str {
18    let mut chars = line.chars().peekable();
19    let mut last_content_start = 0;
20    let mut pos = 0;
21
22    while let Some(&c) = chars.peek() {
23        match c {
24            '>' => {
25                chars.next();
26                pos += 1;
27                // Optionally consume one space after >
28                if chars.peek() == Some(&' ') {
29                    chars.next();
30                    pos += 1;
31                }
32                last_content_start = pos;
33            }
34            ' ' => {
35                // Allow leading spaces before >
36                chars.next();
37                pos += 1;
38            }
39            _ => break,
40        }
41    }
42
43    &line[last_content_start..]
44}
45
46/// Find the (column, end_column) of a footnote definition marker `[^id]:` on a line.
47/// Returns 1-indexed column positions pointing to `[^id]:`, not leading whitespace.
48/// Handles blockquote prefixes and uses character counting for multi-byte support.
49pub fn footnote_def_position(line: &str) -> (usize, usize) {
50    let stripped = strip_blockquote_prefix(line);
51    if let Some(caps) = FOOTNOTE_DEF_PATTERN.captures(stripped) {
52        let prefix_chars = line.chars().count() - stripped.chars().count();
53        let id_match = caps.get(1).unwrap();
54        // `[^` is always 2 bytes before the ID capture group
55        let bracket_byte_pos = id_match.start() - 2;
56        let chars_before_bracket = stripped[..bracket_byte_pos].chars().count();
57        let full_match_end = caps.get(0).unwrap().end();
58        let marker_chars = stripped[bracket_byte_pos..full_match_end].chars().count();
59        (
60            prefix_chars + chars_before_bracket + 1,
61            prefix_chars + chars_before_bracket + marker_chars + 1,
62        )
63    } else {
64        (1, 1)
65    }
66}
67
68/// Rule MD066: Footnote validation - ensure all footnote references have definitions and vice versa
69///
70/// This rule validates footnote usage in markdown documents:
71/// - Detects orphaned footnote references (`[^1]`) without corresponding definitions
72/// - Detects orphaned footnote definitions (`[^1]: text`) that are never referenced
73///
74/// Footnote syntax (common markdown extension, not part of CommonMark):
75/// - Reference: `[^identifier]` in text
76/// - Definition: `[^identifier]: definition text` (can span multiple lines with indentation)
77///
78/// ## Examples
79///
80/// **Valid:**
81/// ```markdown
82/// This has a footnote[^1] that is properly defined.
83///
84/// [^1]: This is the footnote content.
85/// ```
86///
87/// **Invalid - orphaned reference:**
88/// ```markdown
89/// This references[^missing] a footnote that doesn't exist.
90/// ```
91///
92/// **Invalid - orphaned definition:**
93/// ```markdown
94/// [^unused]: This footnote is defined but never referenced.
95/// ```
96#[derive(Debug, Clone, Default)]
97pub struct MD066FootnoteValidation;
98
99impl MD066FootnoteValidation {
100    pub fn new() -> Self {
101        Self
102    }
103}
104
105impl Rule for MD066FootnoteValidation {
106    fn name(&self) -> &'static str {
107        "MD066"
108    }
109
110    fn description(&self) -> &'static str {
111        "Footnote validation"
112    }
113
114    fn category(&self) -> RuleCategory {
115        RuleCategory::Other
116    }
117
118    fn fix_capability(&self) -> FixCapability {
119        FixCapability::Unfixable
120    }
121
122    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
123        ctx.content.is_empty() || !ctx.content.contains("[^")
124    }
125
126    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
127        let mut warnings = Vec::new();
128
129        // Early exit if no footnotes at all
130        if ctx.footnote_refs.is_empty() && !ctx.content.contains("[^") {
131            return Ok(warnings);
132        }
133
134        // Collect all footnote references (id is WITHOUT the ^ prefix)
135        // Map from id -> list of (line, byte_offset) for each reference
136        // Note: pulldown-cmark only finds references when definitions exist,
137        // so we need to parse references directly to find orphaned ones
138        let mut references: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
139
140        // First, use pulldown-cmark's detected references (when definitions exist)
141        for footnote_ref in &ctx.footnote_refs {
142            // Skip if in code block, frontmatter, HTML comment, or HTML block
143            if ctx.line_info(footnote_ref.line).is_some_and(|info| {
144                info.in_code_block || info.in_front_matter || info.in_html_comment || info.in_html_block
145            }) {
146                continue;
147            }
148            references
149                .entry(footnote_ref.id.to_lowercase())
150                .or_default()
151                .push((footnote_ref.line, footnote_ref.byte_offset));
152        }
153
154        // Also parse references directly to find orphaned ones (without definitions)
155        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
156            // Skip if in code block, frontmatter, HTML comment, or HTML block
157            if line_info.in_code_block
158                || line_info.in_front_matter
159                || line_info.in_html_comment
160                || line_info.in_html_block
161            {
162                continue;
163            }
164
165            let line = line_info.content(ctx.content);
166            let line_num = line_idx + 1; // 1-indexed
167
168            for caps in FOOTNOTE_REF_PATTERN.captures_iter(line) {
169                if let Some(id_match) = caps.get(1) {
170                    // Skip if this is a footnote definition (at line start with 0-3 spaces indent)
171                    // Also handle blockquote prefixes (e.g., "> [^id]:")
172                    let full_match = caps.get(0).unwrap();
173                    if line.as_bytes().get(full_match.end()) == Some(&b':') {
174                        let before_match = &line[..full_match.start()];
175                        if before_match.chars().all(|c| c == ' ' || c == '>') {
176                            continue;
177                        }
178                    }
179
180                    let id = id_match.as_str().to_lowercase();
181
182                    // Check if this match is inside a code span
183                    let match_start = full_match.start();
184                    let byte_offset = line_info.byte_offset + match_start;
185
186                    let in_code_span = ctx.is_in_code_span_byte(byte_offset);
187
188                    if !in_code_span {
189                        // Only add if not already found (avoid duplicates with pulldown-cmark)
190                        references.entry(id).or_default().push((line_num, byte_offset));
191                    }
192                }
193            }
194        }
195
196        // Deduplicate references (pulldown-cmark and regex might find the same ones)
197        for occurrences in references.values_mut() {
198            occurrences.sort();
199            occurrences.dedup();
200        }
201
202        // Collect footnote definitions by parsing directly from content
203        // Footnote definitions: [^id]: content (NOT in reference_defs which expects URLs)
204        // Map from id (lowercase) -> list of (line, byte_offset) for duplicate detection
205        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
206        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
207            // Skip if in code block, frontmatter, HTML comment, or HTML block
208            if line_info.in_code_block
209                || line_info.in_front_matter
210                || line_info.in_html_comment
211                || line_info.in_html_block
212            {
213                continue;
214            }
215
216            let line = line_info.content(ctx.content);
217            // Strip blockquote prefixes to handle definitions inside blockquotes
218            let line_stripped = strip_blockquote_prefix(line);
219
220            if let Some(caps) = FOOTNOTE_DEF_PATTERN.captures(line_stripped)
221                && let Some(id_match) = caps.get(1)
222            {
223                let id = id_match.as_str().to_lowercase();
224                let line_num = line_idx + 1; // 1-indexed
225                definitions
226                    .entry(id)
227                    .or_default()
228                    .push((line_num, line_info.byte_offset));
229            }
230        }
231
232        // Check for duplicate definitions
233        for (def_id, occurrences) in &definitions {
234            if occurrences.len() > 1 {
235                // Report all duplicate definitions after the first one
236                for (line, _byte_offset) in &occurrences[1..] {
237                    let (col, end_col) = ctx
238                        .lines
239                        .get(*line - 1)
240                        .map(|li| footnote_def_position(li.content(ctx.content)))
241                        .unwrap_or((1, 1));
242                    warnings.push(LintWarning {
243                        rule_name: Some(self.name().to_string()),
244                        line: *line,
245                        column: col,
246                        end_line: *line,
247                        end_column: end_col,
248                        message: format!(
249                            "Duplicate footnote definition '[^{def_id}]' (first defined on line {})",
250                            occurrences[0].0
251                        ),
252                        severity: Severity::Error,
253                        fix: None,
254                    });
255                }
256            }
257        }
258
259        // Check for orphaned references (references without definitions)
260        let defined_ids: HashSet<&String> = definitions.keys().collect();
261        for (ref_id, occurrences) in &references {
262            if !defined_ids.contains(ref_id) {
263                // Report the first occurrence of each undefined reference
264                let (line, byte_offset) = occurrences[0];
265                // Compute character-based column from byte offset within the line.
266                // Find the actual marker text in the source to get the real length,
267                // since ref_id is lowercased and may differ from the original.
268                let (col, end_col) = if let Some(line_info) = ctx.lines.get(line - 1) {
269                    let line_content = line_info.content(ctx.content);
270                    let byte_pos = byte_offset.saturating_sub(line_info.byte_offset);
271                    let char_col = line_content.get(..byte_pos).map(|s| s.chars().count()).unwrap_or(0);
272                    // Find the actual [^...] marker in the source at this position
273                    let marker_chars = line_content
274                        .get(byte_pos..)
275                        .and_then(|rest| rest.find(']'))
276                        .map(|end| line_content[byte_pos..byte_pos + end + 1].chars().count())
277                        .unwrap_or_else(|| format!("[^{ref_id}]").chars().count());
278                    (char_col + 1, char_col + marker_chars + 1)
279                } else {
280                    (1, 1)
281                };
282                warnings.push(LintWarning {
283                    rule_name: Some(self.name().to_string()),
284                    line,
285                    column: col,
286                    end_line: line,
287                    end_column: end_col,
288                    message: format!("Footnote reference '[^{ref_id}]' has no corresponding definition"),
289                    severity: Severity::Error,
290                    fix: None,
291                });
292            }
293        }
294
295        // Check for orphaned definitions (definitions without references)
296        let referenced_ids: HashSet<&String> = references.keys().collect();
297        for (def_id, occurrences) in &definitions {
298            if !referenced_ids.contains(def_id) {
299                // Report the first definition location
300                let (line, _byte_offset) = occurrences[0];
301                let (col, end_col) = ctx
302                    .lines
303                    .get(line - 1)
304                    .map(|li| footnote_def_position(li.content(ctx.content)))
305                    .unwrap_or((1, 1));
306                warnings.push(LintWarning {
307                    rule_name: Some(self.name().to_string()),
308                    line,
309                    column: col,
310                    end_line: line,
311                    end_column: end_col,
312                    message: format!("Footnote definition '[^{def_id}]' is never referenced"),
313                    severity: Severity::Error,
314                    fix: None,
315                });
316            }
317        }
318
319        // Sort warnings by line number for consistent output
320        warnings.sort_by_key(|w| w.line);
321
322        Ok(warnings)
323    }
324
325    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
326        // No automatic fix - user must decide what to do with orphaned footnotes
327        Ok(ctx.content.to_string())
328    }
329
330    fn as_any(&self) -> &dyn std::any::Any {
331        self
332    }
333
334    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
335    where
336        Self: Sized,
337    {
338        Box::new(MD066FootnoteValidation)
339    }
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345    use crate::lint_context::LintContext;
346
347    fn check_md066(content: &str) -> Vec<LintWarning> {
348        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
349        MD066FootnoteValidation::new().check(&ctx).unwrap()
350    }
351
352    // ==================== Valid cases ====================
353
354    #[test]
355    fn test_valid_single_footnote() {
356        let content = "This has a footnote[^1].\n\n[^1]: The footnote content.";
357        let warnings = check_md066(content);
358        assert!(warnings.is_empty(), "Valid footnote should not warn: {warnings:?}");
359    }
360
361    #[test]
362    fn test_valid_multiple_footnotes() {
363        let content = r#"First footnote[^1] and second[^2].
364
365[^1]: First definition.
366[^2]: Second definition."#;
367        let warnings = check_md066(content);
368        assert!(warnings.is_empty(), "Valid footnotes should not warn: {warnings:?}");
369    }
370
371    #[test]
372    fn test_valid_named_footnotes() {
373        let content = r#"See the note[^note] and warning[^warning].
374
375[^note]: This is a note.
376[^warning]: This is a warning."#;
377        let warnings = check_md066(content);
378        assert!(warnings.is_empty(), "Named footnotes should not warn: {warnings:?}");
379    }
380
381    #[test]
382    fn test_valid_footnote_used_multiple_times() {
383        let content = r#"First[^1] and again[^1] and third[^1].
384
385[^1]: Used multiple times."#;
386        let warnings = check_md066(content);
387        assert!(warnings.is_empty(), "Reused footnote should not warn: {warnings:?}");
388    }
389
390    #[test]
391    fn test_valid_case_insensitive_matching() {
392        let content = r#"Reference[^NOTE].
393
394[^note]: Definition with different case."#;
395        let warnings = check_md066(content);
396        assert!(
397            warnings.is_empty(),
398            "Case-insensitive matching should work: {warnings:?}"
399        );
400    }
401
402    #[test]
403    fn test_no_footnotes_at_all() {
404        let content = "Just regular markdown without any footnotes.";
405        let warnings = check_md066(content);
406        assert!(warnings.is_empty(), "No footnotes should not warn");
407    }
408
409    // ==================== Orphaned references ====================
410
411    #[test]
412    fn test_orphaned_reference_single() {
413        let content = "This references[^missing] a non-existent footnote.";
414        let warnings = check_md066(content);
415        assert_eq!(warnings.len(), 1, "Should detect orphaned reference");
416        assert!(warnings[0].message.contains("missing"));
417        assert!(warnings[0].message.contains("no corresponding definition"));
418    }
419
420    #[test]
421    fn test_orphaned_reference_multiple() {
422        let content = r#"First[^a], second[^b], third[^c].
423
424[^b]: Only b is defined."#;
425        let warnings = check_md066(content);
426        assert_eq!(warnings.len(), 2, "Should detect 2 orphaned references: {warnings:?}");
427        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
428        assert!(messages.iter().any(|m| m.contains("[^a]")));
429        assert!(messages.iter().any(|m| m.contains("[^c]")));
430    }
431
432    #[test]
433    fn test_orphaned_reference_reports_first_occurrence() {
434        let content = "First[^missing] and again[^missing] and third[^missing].";
435        let warnings = check_md066(content);
436        // Should only report once per unique ID
437        assert_eq!(warnings.len(), 1, "Should report each orphaned ID once");
438        assert!(warnings[0].message.contains("missing"));
439    }
440
441    // ==================== Orphaned definitions ====================
442
443    #[test]
444    fn test_orphaned_definition_single() {
445        let content = "Regular text.\n\n[^unused]: This is never referenced.";
446        let warnings = check_md066(content);
447        assert_eq!(warnings.len(), 1, "Should detect orphaned definition");
448        assert!(warnings[0].message.contains("unused"));
449        assert!(warnings[0].message.contains("never referenced"));
450    }
451
452    #[test]
453    fn test_orphaned_definition_multiple() {
454        let content = r#"Using one[^used].
455
456[^used]: This is used.
457[^orphan1]: Never used.
458[^orphan2]: Also never used."#;
459        let warnings = check_md066(content);
460        assert_eq!(warnings.len(), 2, "Should detect 2 orphaned definitions: {warnings:?}");
461        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
462        assert!(messages.iter().any(|m| m.contains("orphan1")));
463        assert!(messages.iter().any(|m| m.contains("orphan2")));
464    }
465
466    // ==================== Mixed cases ====================
467
468    #[test]
469    fn test_both_orphaned_reference_and_definition() {
470        let content = r#"Reference[^missing].
471
472[^unused]: Never referenced."#;
473        let warnings = check_md066(content);
474        assert_eq!(
475            warnings.len(),
476            2,
477            "Should detect both orphaned ref and def: {warnings:?}"
478        );
479        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
480        assert!(
481            messages.iter().any(|m| m.contains("missing")),
482            "Should find missing ref"
483        );
484        assert!(messages.iter().any(|m| m.contains("unused")), "Should find unused def");
485    }
486
487    // ==================== Code block handling ====================
488
489    #[test]
490    fn test_footnote_in_code_block_ignored() {
491        let content = r#"```
492[^1]: This is in a code block
493```
494
495Regular text without footnotes."#;
496        let warnings = check_md066(content);
497        assert!(warnings.is_empty(), "Footnotes in code blocks should be ignored");
498    }
499
500    #[test]
501    fn test_footnote_reference_in_code_span_ignored() {
502        // Note: This depends on whether pulldown-cmark parses footnotes inside code spans
503        // If it does, we should skip them
504        let content = r#"Use `[^1]` syntax for footnotes.
505
506[^1]: This definition exists but the reference in backticks shouldn't count."#;
507        // This is tricky - if pulldown-cmark doesn't parse [^1] in backticks as a footnote ref,
508        // then the definition is orphaned
509        let warnings = check_md066(content);
510        // Expectation depends on parser behavior - test the actual behavior
511        assert_eq!(
512            warnings.len(),
513            1,
514            "Code span reference shouldn't count, definition is orphaned"
515        );
516        assert!(warnings[0].message.contains("never referenced"));
517    }
518
519    // ==================== Frontmatter handling ====================
520
521    #[test]
522    fn test_footnote_in_frontmatter_ignored() {
523        let content = r#"---
524note: "[^1]: yaml value"
525---
526
527Regular content."#;
528        let warnings = check_md066(content);
529        assert!(
530            warnings.is_empty(),
531            "Footnotes in frontmatter should be ignored: {warnings:?}"
532        );
533    }
534
535    // ==================== Edge cases ====================
536
537    #[test]
538    fn test_empty_document() {
539        let warnings = check_md066("");
540        assert!(warnings.is_empty());
541    }
542
543    #[test]
544    fn test_footnote_with_special_characters() {
545        let content = r#"Reference[^my-note_1].
546
547[^my-note_1]: Definition with special chars in ID."#;
548        let warnings = check_md066(content);
549        assert!(
550            warnings.is_empty(),
551            "Special characters in footnote ID should work: {warnings:?}"
552        );
553    }
554
555    #[test]
556    fn test_multiline_footnote_definition() {
557        let content = r#"Reference[^long].
558
559[^long]: This is a long footnote
560    that spans multiple lines
561    with proper indentation."#;
562        let warnings = check_md066(content);
563        assert!(
564            warnings.is_empty(),
565            "Multiline footnote definitions should work: {warnings:?}"
566        );
567    }
568
569    #[test]
570    fn test_footnote_at_end_of_sentence() {
571        let content = r#"This ends with a footnote[^1].
572
573[^1]: End of sentence footnote."#;
574        let warnings = check_md066(content);
575        assert!(warnings.is_empty());
576    }
577
578    #[test]
579    fn test_footnote_mid_sentence() {
580        let content = r#"Some text[^1] continues here.
581
582[^1]: Mid-sentence footnote."#;
583        let warnings = check_md066(content);
584        assert!(warnings.is_empty());
585    }
586
587    #[test]
588    fn test_adjacent_footnotes() {
589        let content = r#"Text[^1][^2] with adjacent footnotes.
590
591[^1]: First.
592[^2]: Second."#;
593        let warnings = check_md066(content);
594        assert!(warnings.is_empty(), "Adjacent footnotes should work: {warnings:?}");
595    }
596
597    #[test]
598    fn test_footnote_only_definitions_no_references() {
599        let content = r#"[^1]: First orphan.
600[^2]: Second orphan.
601[^3]: Third orphan."#;
602        let warnings = check_md066(content);
603        assert_eq!(warnings.len(), 3, "All definitions should be flagged: {warnings:?}");
604    }
605
606    #[test]
607    fn test_footnote_only_references_no_definitions() {
608        let content = "Text[^1] and[^2] and[^3].";
609        let warnings = check_md066(content);
610        assert_eq!(warnings.len(), 3, "All references should be flagged: {warnings:?}");
611    }
612
613    // ==================== Blockquote handling ====================
614
615    #[test]
616    fn test_footnote_in_blockquote_valid() {
617        let content = r#"> This has a footnote[^1].
618>
619> [^1]: Definition inside blockquote."#;
620        let warnings = check_md066(content);
621        assert!(
622            warnings.is_empty(),
623            "Footnotes inside blockquotes should be validated: {warnings:?}"
624        );
625    }
626
627    #[test]
628    fn test_footnote_in_nested_blockquote() {
629        let content = r#"> > Nested blockquote with footnote[^nested].
630> >
631> > [^nested]: Definition in nested blockquote."#;
632        let warnings = check_md066(content);
633        assert!(
634            warnings.is_empty(),
635            "Footnotes in nested blockquotes should work: {warnings:?}"
636        );
637    }
638
639    #[test]
640    fn test_footnote_blockquote_orphaned_reference() {
641        let content = r#"> This has an orphaned footnote[^missing].
642>
643> No definition here."#;
644        let warnings = check_md066(content);
645        assert_eq!(warnings.len(), 1, "Should detect orphaned ref in blockquote");
646        assert!(warnings[0].message.contains("missing"));
647    }
648
649    #[test]
650    fn test_footnote_blockquote_orphaned_definition() {
651        let content = r#"> Some text.
652>
653> [^unused]: Never referenced in blockquote."#;
654        let warnings = check_md066(content);
655        assert_eq!(warnings.len(), 1, "Should detect orphaned def in blockquote");
656        assert!(warnings[0].message.contains("unused"));
657    }
658
659    // ==================== Duplicate definitions ====================
660
661    #[test]
662    fn test_duplicate_definition_detected() {
663        let content = r#"Reference[^1].
664
665[^1]: First definition.
666[^1]: Second definition (duplicate)."#;
667        let warnings = check_md066(content);
668        assert_eq!(warnings.len(), 1, "Should detect duplicate definition: {warnings:?}");
669        assert!(warnings[0].message.contains("Duplicate"));
670        assert!(warnings[0].message.contains("[^1]"));
671    }
672
673    #[test]
674    fn test_multiple_duplicate_definitions() {
675        let content = r#"Reference[^dup].
676
677[^dup]: First.
678[^dup]: Second.
679[^dup]: Third."#;
680        let warnings = check_md066(content);
681        assert_eq!(warnings.len(), 2, "Should detect 2 duplicate definitions: {warnings:?}");
682        assert!(warnings.iter().all(|w| w.message.contains("Duplicate")));
683    }
684
685    #[test]
686    fn test_duplicate_definition_case_insensitive() {
687        let content = r#"Reference[^Note].
688
689[^note]: Lowercase definition.
690[^NOTE]: Uppercase definition (duplicate)."#;
691        let warnings = check_md066(content);
692        assert_eq!(warnings.len(), 1, "Case-insensitive duplicate detection: {warnings:?}");
693        assert!(warnings[0].message.contains("Duplicate"));
694    }
695
696    // ==================== HTML comment handling ====================
697
698    #[test]
699    fn test_footnote_reference_in_html_comment_ignored() {
700        let content = r#"<!-- This has [^1] in a comment -->
701
702Regular text without footnotes."#;
703        let warnings = check_md066(content);
704        assert!(
705            warnings.is_empty(),
706            "Footnote refs in HTML comments should be ignored: {warnings:?}"
707        );
708    }
709
710    #[test]
711    fn test_footnote_definition_in_html_comment_ignored() {
712        let content = r#"<!--
713[^1]: Definition in HTML comment
714-->
715
716Regular text."#;
717        let warnings = check_md066(content);
718        assert!(
719            warnings.is_empty(),
720            "Footnote defs in HTML comments should be ignored: {warnings:?}"
721        );
722    }
723
724    #[test]
725    fn test_footnote_outside_html_comment_still_validated() {
726        let content = r#"<!-- Just a comment -->
727
728Text with footnote[^1].
729
730[^1]: Valid definition outside comment."#;
731        let warnings = check_md066(content);
732        assert!(warnings.is_empty(), "Valid footnote outside comment: {warnings:?}");
733    }
734
735    #[test]
736    fn test_orphaned_ref_not_saved_by_def_in_comment() {
737        let content = r#"Text with orphaned[^missing].
738
739<!--
740[^missing]: This definition is in a comment, shouldn't count
741-->"#;
742        let warnings = check_md066(content);
743        assert_eq!(warnings.len(), 1, "Def in comment shouldn't satisfy ref: {warnings:?}");
744        assert!(warnings[0].message.contains("no corresponding definition"));
745    }
746
747    // ==================== HTML block handling ====================
748
749    #[test]
750    fn test_footnote_in_html_block_ignored() {
751        // Regex character classes like [^abc] should be ignored in HTML blocks
752        let content = r#"<table>
753<tr>
754<td><code>[^abc]</code></td>
755<td>Negated character class</td>
756</tr>
757</table>
758
759Regular markdown text."#;
760        let warnings = check_md066(content);
761        assert!(
762            warnings.is_empty(),
763            "Footnote-like patterns in HTML blocks should be ignored: {warnings:?}"
764        );
765    }
766
767    #[test]
768    fn test_footnote_in_html_table_ignored() {
769        let content = r#"| Header |
770|--------|
771| Cell   |
772
773<div>
774<p>This has <code>[^0-9]</code> regex pattern</p>
775</div>
776
777Normal text."#;
778        let warnings = check_md066(content);
779        assert!(
780            warnings.is_empty(),
781            "Regex patterns in HTML div should be ignored: {warnings:?}"
782        );
783    }
784
785    #[test]
786    fn test_real_footnote_outside_html_block() {
787        let content = r#"<div>
788Some HTML content
789</div>
790
791Text with real footnote[^1].
792
793[^1]: This is a real footnote definition."#;
794        let warnings = check_md066(content);
795        assert!(
796            warnings.is_empty(),
797            "Real footnote outside HTML block should work: {warnings:?}"
798        );
799    }
800
801    // ==================== Combined edge cases ====================
802
803    #[test]
804    fn test_blockquote_with_duplicate_definitions() {
805        let content = r#"> Text[^1].
806>
807> [^1]: First.
808> [^1]: Duplicate in blockquote."#;
809        let warnings = check_md066(content);
810        assert_eq!(warnings.len(), 1, "Should detect duplicate in blockquote: {warnings:?}");
811        assert!(warnings[0].message.contains("Duplicate"));
812    }
813
814    #[test]
815    fn test_all_enhancement_features_together() {
816        let content = r#"<!-- Comment with [^comment] -->
817
818Regular text[^valid] and[^missing].
819
820> Blockquote text[^bq].
821>
822> [^bq]: Blockquote definition.
823
824[^valid]: Valid definition.
825[^valid]: Duplicate definition.
826[^unused]: Never referenced."#;
827        let warnings = check_md066(content);
828        // Should find:
829        // 1. [^missing] - orphaned reference
830        // 2. [^valid] duplicate definition
831        // 3. [^unused] - orphaned definition
832        assert_eq!(warnings.len(), 3, "Should find all issues: {warnings:?}");
833
834        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
835        assert!(
836            messages.iter().any(|m| m.contains("missing")),
837            "Should find orphaned ref"
838        );
839        assert!(
840            messages.iter().any(|m| m.contains("Duplicate")),
841            "Should find duplicate"
842        );
843        assert!(
844            messages.iter().any(|m| m.contains("unused")),
845            "Should find orphaned def"
846        );
847    }
848
849    #[test]
850    fn test_footnote_ref_at_end_of_file_no_newline() {
851        let content = "[^1]: Definition here.\n\nText with[^1]";
852        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
853        let rule = MD066FootnoteValidation;
854        let result = rule.check(&ctx).unwrap();
855        assert!(
856            result.is_empty(),
857            "Valid footnote pair without trailing newline should not warn: {result:?}"
858        );
859    }
860
861    #[test]
862    fn test_orphaned_footnote_ref_at_eof_no_newline() {
863        let content = "Text with[^missing]";
864        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
865        let rule = MD066FootnoteValidation;
866        let result = rule.check(&ctx).unwrap();
867        assert!(
868            !result.is_empty(),
869            "Orphaned ref at EOF without newline should warn: {result:?}"
870        );
871    }
872
873    #[test]
874    fn test_midline_footnote_ref_with_colon_detected_as_reference() {
875        // [^note]: mid-line is a reference followed by colon, NOT a definition
876        let content = "# Test\n\nI think [^note]: this is relevant.\n";
877        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
878        let rule = MD066FootnoteValidation;
879        let result = rule.check(&ctx).unwrap();
880        assert_eq!(
881            result.len(),
882            1,
883            "Mid-line [^note]: should be detected as undefined reference: {result:?}"
884        );
885        assert!(
886            result[0].message.contains("no corresponding definition"),
887            "Should warn about missing definition: {}",
888            result[0].message
889        );
890    }
891
892    #[test]
893    fn test_midline_footnote_ref_with_colon_matched_to_definition() {
894        // [^note]: mid-line is a reference; [^note]: at line start is the definition
895        let content = "# Test\n\nI think [^note]: this is relevant.\n\n[^note]: The actual definition.\n";
896        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
897        let rule = MD066FootnoteValidation;
898        let result = rule.check(&ctx).unwrap();
899        assert!(
900            result.is_empty(),
901            "Mid-line ref should match line-start definition: {result:?}"
902        );
903    }
904
905    #[test]
906    fn test_linestart_footnote_def_still_skipped_as_reference() {
907        // [^note]: at line start IS a definition and should NOT be counted as reference
908        let content = "# Test\n\n[^note]: The definition.\n";
909        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
910        let rule = MD066FootnoteValidation;
911        let result = rule.check(&ctx).unwrap();
912        // Should warn about orphaned definition (no reference)
913        assert_eq!(result.len(), 1, "Orphaned def should be flagged: {result:?}");
914        assert!(
915            result[0].message.contains("never referenced"),
916            "Should say 'never referenced': {}",
917            result[0].message
918        );
919    }
920
921    #[test]
922    fn test_indented_footnote_def_still_skipped() {
923        // [^note]: with 1-3 spaces indent is still a definition
924        let content = "# Test\n\n   [^note]: Indented definition.\n";
925        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
926        let rule = MD066FootnoteValidation;
927        let result = rule.check(&ctx).unwrap();
928        // Should be treated as an orphaned definition (no reference)
929        assert_eq!(result.len(), 1, "Indented def should still be detected: {result:?}");
930        assert!(
931            result[0].message.contains("never referenced"),
932            "Should say 'never referenced': {}",
933            result[0].message
934        );
935    }
936
937    #[test]
938    fn test_multiple_midline_refs_with_colons_on_same_line() {
939        // Both [^a]: and [^b]: mid-line should be counted as references
940        let content = "# Test\n\nText [^a]: and [^b]: more text.\n\n[^a]: Def A.\n[^b]: Def B.\n";
941        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
942        let rule = MD066FootnoteValidation;
943        let result = rule.check(&ctx).unwrap();
944        assert!(
945            result.is_empty(),
946            "Both mid-line refs should match their definitions: {result:?}"
947        );
948    }
949
950    #[test]
951    fn test_blockquote_footnote_def_still_skipped() {
952        // > [^note]: inside blockquote is a definition, not a reference
953        let content = "# Test\n\n> [^note]: Definition in blockquote.\n";
954        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
955        let rule = MD066FootnoteValidation;
956        let result = rule.check(&ctx).unwrap();
957        // Orphaned definition (no reference uses it)
958        assert_eq!(
959            result.len(),
960            1,
961            "Blockquote def should be detected as orphaned: {result:?}"
962        );
963        assert!(
964            result[0].message.contains("never referenced"),
965            "Should say 'never referenced': {}",
966            result[0].message
967        );
968    }
969
970    #[test]
971    fn test_list_item_footnote_ref_with_colon_is_reference() {
972        // - [^note]: inside a list item is a reference, not a definition
973        let content = "# Test\n\n- [^note]: list item text.\n\n[^note]: The actual definition.\n";
974        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
975        let rule = MD066FootnoteValidation;
976        let result = rule.check(&ctx).unwrap();
977        assert!(
978            result.is_empty(),
979            "List item [^note]: should be a ref matching the definition: {result:?}"
980        );
981    }
982
983    // ==================== Warning position tests ====================
984
985    #[test]
986    fn test_orphaned_reference_column_position() {
987        // "This references[^missing] a non-existent footnote."
988        //  column 16:     ^
989        let content = "This references[^missing] a non-existent footnote.";
990        let warnings = check_md066(content);
991        assert_eq!(warnings.len(), 1);
992        assert_eq!(warnings[0].line, 1);
993        assert_eq!(warnings[0].column, 16, "Column should point to '[^missing]'");
994        // "[^missing]" is 10 chars, so end_column = 16 + 10 = 26
995        assert_eq!(warnings[0].end_column, 26);
996    }
997
998    #[test]
999    fn test_orphaned_definition_column_position() {
1000        // "[^unused]: Never referenced." starts at column 1
1001        let content = "Regular text.\n\n[^unused]: Never referenced.";
1002        let warnings = check_md066(content);
1003        assert_eq!(warnings.len(), 1);
1004        assert_eq!(warnings[0].line, 3);
1005        assert_eq!(warnings[0].column, 1, "Definition at start of line");
1006        // "[^unused]:" is 10 chars
1007        assert_eq!(warnings[0].end_column, 11);
1008    }
1009
1010    #[test]
1011    fn test_duplicate_definition_column_position() {
1012        let content = "Reference[^1].\n\n[^1]: First.\n[^1]: Second.";
1013        let warnings = check_md066(content);
1014        assert_eq!(warnings.len(), 1);
1015        assert_eq!(warnings[0].line, 4);
1016        assert_eq!(warnings[0].column, 1);
1017        // "[^1]:" is 5 chars
1018        assert_eq!(warnings[0].end_column, 6);
1019    }
1020
1021    #[test]
1022    fn test_orphaned_definition_in_blockquote_column() {
1023        // "> [^unused]: Never referenced."
1024        //    ^ column 3 (after "> ")
1025        let content = "> Some text.\n>\n> [^unused]: Never referenced.";
1026        let warnings = check_md066(content);
1027        assert_eq!(warnings.len(), 1);
1028        assert_eq!(warnings[0].line, 3);
1029        assert_eq!(warnings[0].column, 3, "Should point past blockquote prefix");
1030    }
1031
1032    #[test]
1033    fn test_orphaned_reference_after_multibyte_chars() {
1034        // "日本語テキスト[^ref1] has no def."
1035        // "日本語テキスト" = 7 characters (each is 3 bytes in UTF-8)
1036        // Column should be 8 (character-based), not 22 (byte-based)
1037        let content = "日本語テキスト[^ref1] has no def.";
1038        let warnings = check_md066(content);
1039        assert_eq!(warnings.len(), 1);
1040        assert_eq!(
1041            warnings[0].column, 8,
1042            "Column should be character-based, not byte-based"
1043        );
1044        // "[^ref1]" = 7 chars
1045        assert_eq!(warnings[0].end_column, 15);
1046    }
1047
1048    #[test]
1049    fn test_orphaned_definition_with_indentation_column() {
1050        // "   [^note]:" — column should point to [^note]:, not the leading spaces
1051        let content = "# Heading\n\n   [^note]: Indented and orphaned.";
1052        let warnings = check_md066(content);
1053        assert_eq!(warnings.len(), 1);
1054        // "[^note]:" starts at column 4 (after 3 spaces)
1055        assert_eq!(warnings[0].column, 4);
1056        // "[^note]:" is 8 chars, end_column = 4 + 8 = 12
1057        assert_eq!(warnings[0].end_column, 12);
1058    }
1059
1060    #[test]
1061    fn test_orphaned_ref_end_column_uses_original_case() {
1062        // ref_id is stored lowercased, but end_column should reflect the actual source text
1063        let content = "Text with [^NOTE] here.";
1064        let warnings = check_md066(content);
1065        assert_eq!(warnings.len(), 1);
1066        // "Text with " = 10 chars, so [^NOTE] starts at column 11
1067        assert_eq!(warnings[0].column, 11);
1068        // "[^NOTE]" = 7 chars, end_column = 11 + 7 = 18
1069        assert_eq!(warnings[0].end_column, 18);
1070    }
1071}