Skip to main content

rumdl_lib/rules/
md066_footnote_validation.rs

1use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use regex::Regex;
3use std::collections::{HashMap, HashSet};
4use std::sync::LazyLock;
5
6/// Pattern to match footnote definitions: [^id]: content
7/// Matches at start of line, with 0-3 leading spaces, caret in brackets
8/// Also handles definitions inside blockquotes (after stripping > prefixes)
9pub static FOOTNOTE_DEF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[ ]{0,3}\[\^([^\]]+)\]:").unwrap());
10
11/// Pattern to match footnote references in text: [^id]
12/// Callers must manually check that the match is NOT followed by `:` (which would make it a definition)
13pub static FOOTNOTE_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[\^([^\]]+)\]").unwrap());
14
15/// Strip blockquote prefixes from a line to check for footnote definitions
16/// Handles nested blockquotes like `> > > ` and variations with/without spaces
17pub fn strip_blockquote_prefix(line: &str) -> &str {
18    let mut chars = line.chars().peekable();
19    let mut last_content_start = 0;
20    let mut pos = 0;
21
22    while let Some(&c) = chars.peek() {
23        match c {
24            '>' => {
25                chars.next();
26                pos += 1;
27                // Optionally consume one space after >
28                if chars.peek() == Some(&' ') {
29                    chars.next();
30                    pos += 1;
31                }
32                last_content_start = pos;
33            }
34            ' ' => {
35                // Allow leading spaces before >
36                chars.next();
37                pos += 1;
38            }
39            _ => break,
40        }
41    }
42
43    &line[last_content_start..]
44}
45
46/// Find the (column, end_column) of a footnote definition marker `[^id]:` on a line.
47/// Returns 1-indexed column positions pointing to `[^id]:`, not leading whitespace.
48/// Handles blockquote prefixes and uses character counting for multi-byte support.
49pub fn footnote_def_position(line: &str) -> (usize, usize) {
50    let stripped = strip_blockquote_prefix(line);
51    if let Some(caps) = FOOTNOTE_DEF_PATTERN.captures(stripped) {
52        let prefix_chars = line.chars().count() - stripped.chars().count();
53        let id_match = caps.get(1).unwrap();
54        // `[^` is always 2 bytes before the ID capture group
55        let bracket_byte_pos = id_match.start() - 2;
56        let chars_before_bracket = stripped[..bracket_byte_pos].chars().count();
57        let full_match_end = caps.get(0).unwrap().end();
58        let marker_chars = stripped[bracket_byte_pos..full_match_end].chars().count();
59        (
60            prefix_chars + chars_before_bracket + 1,
61            prefix_chars + chars_before_bracket + marker_chars + 1,
62        )
63    } else {
64        (1, 1)
65    }
66}
67
68/// Rule MD066: Footnote validation - ensure all footnote references have definitions and vice versa
69///
70/// This rule validates footnote usage in markdown documents:
71/// - Detects orphaned footnote references (`[^1]`) without corresponding definitions
72/// - Detects orphaned footnote definitions (`[^1]: text`) that are never referenced
73///
74/// Footnote syntax (common markdown extension, not part of CommonMark):
75/// - Reference: `[^identifier]` in text
76/// - Definition: `[^identifier]: definition text` (can span multiple lines with indentation)
77///
78/// ## Examples
79///
80/// **Valid:**
81/// ```markdown
82/// This has a footnote[^1] that is properly defined.
83///
84/// [^1]: This is the footnote content.
85/// ```
86///
87/// **Invalid - orphaned reference:**
88/// ```markdown
89/// This references[^missing] a footnote that doesn't exist.
90/// ```
91///
92/// **Invalid - orphaned definition:**
93/// ```markdown
94/// [^unused]: This footnote is defined but never referenced.
95/// ```
96#[derive(Debug, Clone, Default)]
97pub struct MD066FootnoteValidation;
98
99impl MD066FootnoteValidation {
100    pub fn new() -> Self {
101        Self
102    }
103}
104
105impl Rule for MD066FootnoteValidation {
106    fn name(&self) -> &'static str {
107        "MD066"
108    }
109
110    fn description(&self) -> &'static str {
111        "Footnote validation"
112    }
113
114    fn category(&self) -> RuleCategory {
115        RuleCategory::Other
116    }
117
118    fn fix_capability(&self) -> FixCapability {
119        FixCapability::Unfixable
120    }
121
122    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
123        ctx.content.is_empty() || !ctx.content.contains("[^")
124    }
125
126    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
127        let mut warnings = Vec::new();
128
129        // Early exit if no footnotes at all
130        if ctx.footnote_refs.is_empty() && !ctx.content.contains("[^") {
131            return Ok(warnings);
132        }
133
134        // Collect all footnote references (id is WITHOUT the ^ prefix)
135        // Map from id -> list of (line, byte_offset) for each reference
136        // Note: pulldown-cmark only finds references when definitions exist,
137        // so we need to parse references directly to find orphaned ones
138        let mut references: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
139
140        // First, use pulldown-cmark's detected references (when definitions exist)
141        for footnote_ref in &ctx.footnote_refs {
142            // Skip if in code block, frontmatter, HTML comment, or HTML block
143            if ctx.line_info(footnote_ref.line).is_some_and(|info| {
144                info.in_code_block
145                    || info.in_front_matter
146                    || info.in_html_comment
147                    || info.in_mdx_comment
148                    || info.in_html_block
149            }) {
150                continue;
151            }
152            references
153                .entry(footnote_ref.id.to_lowercase())
154                .or_default()
155                .push((footnote_ref.line, footnote_ref.byte_offset));
156        }
157
158        // Also parse references directly to find orphaned ones (without definitions)
159        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
160            // Skip if in code block, frontmatter, HTML comment, or HTML block
161            if line_info.in_code_block
162                || line_info.in_front_matter
163                || line_info.in_html_comment
164                || line_info.in_mdx_comment
165                || line_info.in_html_block
166            {
167                continue;
168            }
169
170            let line = line_info.content(ctx.content);
171            let line_num = line_idx + 1; // 1-indexed
172
173            for caps in FOOTNOTE_REF_PATTERN.captures_iter(line) {
174                if let Some(id_match) = caps.get(1) {
175                    // Skip if this is a footnote definition (at line start with 0-3 spaces indent)
176                    // Also handle blockquote prefixes (e.g., "> [^id]:")
177                    let full_match = caps.get(0).unwrap();
178                    if line.as_bytes().get(full_match.end()) == Some(&b':') {
179                        let before_match = &line[..full_match.start()];
180                        if before_match.chars().all(|c| c == ' ' || c == '>') {
181                            continue;
182                        }
183                    }
184
185                    let id = id_match.as_str().to_lowercase();
186
187                    // Check if this match is inside a code span
188                    let match_start = full_match.start();
189                    let byte_offset = line_info.byte_offset + match_start;
190
191                    let in_code_span = ctx.is_in_code_span_byte(byte_offset);
192
193                    if !in_code_span {
194                        // Only add if not already found (avoid duplicates with pulldown-cmark)
195                        references.entry(id).or_default().push((line_num, byte_offset));
196                    }
197                }
198            }
199        }
200
201        // Deduplicate references (pulldown-cmark and regex might find the same ones)
202        for occurrences in references.values_mut() {
203            occurrences.sort();
204            occurrences.dedup();
205        }
206
207        // Collect footnote definitions by parsing directly from content
208        // Footnote definitions: [^id]: content (NOT in reference_defs which expects URLs)
209        // Map from id (lowercase) -> list of (line, byte_offset) for duplicate detection
210        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
211        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
212            // Skip if in code block, frontmatter, HTML comment, or HTML block
213            if line_info.in_code_block
214                || line_info.in_front_matter
215                || line_info.in_html_comment
216                || line_info.in_mdx_comment
217                || line_info.in_html_block
218            {
219                continue;
220            }
221
222            let line = line_info.content(ctx.content);
223            // Strip blockquote prefixes to handle definitions inside blockquotes
224            let line_stripped = strip_blockquote_prefix(line);
225
226            if let Some(caps) = FOOTNOTE_DEF_PATTERN.captures(line_stripped)
227                && let Some(id_match) = caps.get(1)
228            {
229                let id = id_match.as_str().to_lowercase();
230                let line_num = line_idx + 1; // 1-indexed
231                definitions
232                    .entry(id)
233                    .or_default()
234                    .push((line_num, line_info.byte_offset));
235            }
236        }
237
238        // Check for duplicate definitions
239        for (def_id, occurrences) in &definitions {
240            if occurrences.len() > 1 {
241                // Report all duplicate definitions after the first one
242                for (line, _byte_offset) in &occurrences[1..] {
243                    let (col, end_col) = ctx
244                        .lines
245                        .get(*line - 1)
246                        .map(|li| footnote_def_position(li.content(ctx.content)))
247                        .unwrap_or((1, 1));
248                    warnings.push(LintWarning {
249                        rule_name: Some(self.name().to_string()),
250                        line: *line,
251                        column: col,
252                        end_line: *line,
253                        end_column: end_col,
254                        message: format!(
255                            "Duplicate footnote definition '[^{def_id}]' (first defined on line {})",
256                            occurrences[0].0
257                        ),
258                        severity: Severity::Error,
259                        fix: None,
260                    });
261                }
262            }
263        }
264
265        // Check for orphaned references (references without definitions)
266        let defined_ids: HashSet<&String> = definitions.keys().collect();
267        for (ref_id, occurrences) in &references {
268            if !defined_ids.contains(ref_id) {
269                // Report the first occurrence of each undefined reference
270                let (line, byte_offset) = occurrences[0];
271                // Compute character-based column from byte offset within the line.
272                // Find the actual marker text in the source to get the real length,
273                // since ref_id is lowercased and may differ from the original.
274                let (col, end_col) = if let Some(line_info) = ctx.lines.get(line - 1) {
275                    let line_content = line_info.content(ctx.content);
276                    let byte_pos = byte_offset.saturating_sub(line_info.byte_offset);
277                    let char_col = line_content.get(..byte_pos).map(|s| s.chars().count()).unwrap_or(0);
278                    // Find the actual [^...] marker in the source at this position
279                    let marker_chars = line_content
280                        .get(byte_pos..)
281                        .and_then(|rest| rest.find(']'))
282                        .map(|end| line_content[byte_pos..byte_pos + end + 1].chars().count())
283                        .unwrap_or_else(|| format!("[^{ref_id}]").chars().count());
284                    (char_col + 1, char_col + marker_chars + 1)
285                } else {
286                    (1, 1)
287                };
288                warnings.push(LintWarning {
289                    rule_name: Some(self.name().to_string()),
290                    line,
291                    column: col,
292                    end_line: line,
293                    end_column: end_col,
294                    message: format!("Footnote reference '[^{ref_id}]' has no corresponding definition"),
295                    severity: Severity::Error,
296                    fix: None,
297                });
298            }
299        }
300
301        // Check for orphaned definitions (definitions without references)
302        let referenced_ids: HashSet<&String> = references.keys().collect();
303        for (def_id, occurrences) in &definitions {
304            if !referenced_ids.contains(def_id) {
305                // Report the first definition location
306                let (line, _byte_offset) = occurrences[0];
307                let (col, end_col) = ctx
308                    .lines
309                    .get(line - 1)
310                    .map(|li| footnote_def_position(li.content(ctx.content)))
311                    .unwrap_or((1, 1));
312                warnings.push(LintWarning {
313                    rule_name: Some(self.name().to_string()),
314                    line,
315                    column: col,
316                    end_line: line,
317                    end_column: end_col,
318                    message: format!("Footnote definition '[^{def_id}]' is never referenced"),
319                    severity: Severity::Error,
320                    fix: None,
321                });
322            }
323        }
324
325        // Sort warnings by line number for consistent output
326        warnings.sort_by_key(|w| w.line);
327
328        Ok(warnings)
329    }
330
331    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
332        // No automatic fix - user must decide what to do with orphaned footnotes
333        Ok(ctx.content.to_string())
334    }
335
336    fn as_any(&self) -> &dyn std::any::Any {
337        self
338    }
339
340    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
341    where
342        Self: Sized,
343    {
344        Box::new(MD066FootnoteValidation)
345    }
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351    use crate::lint_context::LintContext;
352
353    fn check_md066(content: &str) -> Vec<LintWarning> {
354        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
355        MD066FootnoteValidation::new().check(&ctx).unwrap()
356    }
357
358    // ==================== Valid cases ====================
359
360    #[test]
361    fn test_valid_single_footnote() {
362        let content = "This has a footnote[^1].\n\n[^1]: The footnote content.";
363        let warnings = check_md066(content);
364        assert!(warnings.is_empty(), "Valid footnote should not warn: {warnings:?}");
365    }
366
367    #[test]
368    fn test_valid_multiple_footnotes() {
369        let content = r#"First footnote[^1] and second[^2].
370
371[^1]: First definition.
372[^2]: Second definition."#;
373        let warnings = check_md066(content);
374        assert!(warnings.is_empty(), "Valid footnotes should not warn: {warnings:?}");
375    }
376
377    #[test]
378    fn test_valid_named_footnotes() {
379        let content = r#"See the note[^note] and warning[^warning].
380
381[^note]: This is a note.
382[^warning]: This is a warning."#;
383        let warnings = check_md066(content);
384        assert!(warnings.is_empty(), "Named footnotes should not warn: {warnings:?}");
385    }
386
387    #[test]
388    fn test_valid_footnote_used_multiple_times() {
389        let content = r#"First[^1] and again[^1] and third[^1].
390
391[^1]: Used multiple times."#;
392        let warnings = check_md066(content);
393        assert!(warnings.is_empty(), "Reused footnote should not warn: {warnings:?}");
394    }
395
396    #[test]
397    fn test_valid_case_insensitive_matching() {
398        let content = r#"Reference[^NOTE].
399
400[^note]: Definition with different case."#;
401        let warnings = check_md066(content);
402        assert!(
403            warnings.is_empty(),
404            "Case-insensitive matching should work: {warnings:?}"
405        );
406    }
407
408    #[test]
409    fn test_no_footnotes_at_all() {
410        let content = "Just regular markdown without any footnotes.";
411        let warnings = check_md066(content);
412        assert!(warnings.is_empty(), "No footnotes should not warn");
413    }
414
415    // ==================== Orphaned references ====================
416
417    #[test]
418    fn test_orphaned_reference_single() {
419        let content = "This references[^missing] a non-existent footnote.";
420        let warnings = check_md066(content);
421        assert_eq!(warnings.len(), 1, "Should detect orphaned reference");
422        assert!(warnings[0].message.contains("missing"));
423        assert!(warnings[0].message.contains("no corresponding definition"));
424    }
425
426    #[test]
427    fn test_orphaned_reference_multiple() {
428        let content = r#"First[^a], second[^b], third[^c].
429
430[^b]: Only b is defined."#;
431        let warnings = check_md066(content);
432        assert_eq!(warnings.len(), 2, "Should detect 2 orphaned references: {warnings:?}");
433        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
434        assert!(messages.iter().any(|m| m.contains("[^a]")));
435        assert!(messages.iter().any(|m| m.contains("[^c]")));
436    }
437
438    #[test]
439    fn test_orphaned_reference_reports_first_occurrence() {
440        let content = "First[^missing] and again[^missing] and third[^missing].";
441        let warnings = check_md066(content);
442        // Should only report once per unique ID
443        assert_eq!(warnings.len(), 1, "Should report each orphaned ID once");
444        assert!(warnings[0].message.contains("missing"));
445    }
446
447    // ==================== Orphaned definitions ====================
448
449    #[test]
450    fn test_orphaned_definition_single() {
451        let content = "Regular text.\n\n[^unused]: This is never referenced.";
452        let warnings = check_md066(content);
453        assert_eq!(warnings.len(), 1, "Should detect orphaned definition");
454        assert!(warnings[0].message.contains("unused"));
455        assert!(warnings[0].message.contains("never referenced"));
456    }
457
458    #[test]
459    fn test_orphaned_definition_multiple() {
460        let content = r#"Using one[^used].
461
462[^used]: This is used.
463[^orphan1]: Never used.
464[^orphan2]: Also never used."#;
465        let warnings = check_md066(content);
466        assert_eq!(warnings.len(), 2, "Should detect 2 orphaned definitions: {warnings:?}");
467        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
468        assert!(messages.iter().any(|m| m.contains("orphan1")));
469        assert!(messages.iter().any(|m| m.contains("orphan2")));
470    }
471
472    // ==================== Mixed cases ====================
473
474    #[test]
475    fn test_both_orphaned_reference_and_definition() {
476        let content = r#"Reference[^missing].
477
478[^unused]: Never referenced."#;
479        let warnings = check_md066(content);
480        assert_eq!(
481            warnings.len(),
482            2,
483            "Should detect both orphaned ref and def: {warnings:?}"
484        );
485        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
486        assert!(
487            messages.iter().any(|m| m.contains("missing")),
488            "Should find missing ref"
489        );
490        assert!(messages.iter().any(|m| m.contains("unused")), "Should find unused def");
491    }
492
493    // ==================== Code block handling ====================
494
495    #[test]
496    fn test_footnote_in_code_block_ignored() {
497        let content = r#"```
498[^1]: This is in a code block
499```
500
501Regular text without footnotes."#;
502        let warnings = check_md066(content);
503        assert!(warnings.is_empty(), "Footnotes in code blocks should be ignored");
504    }
505
506    #[test]
507    fn test_footnote_reference_in_code_span_ignored() {
508        // Note: This depends on whether pulldown-cmark parses footnotes inside code spans
509        // If it does, we should skip them
510        let content = r#"Use `[^1]` syntax for footnotes.
511
512[^1]: This definition exists but the reference in backticks shouldn't count."#;
513        // This is tricky - if pulldown-cmark doesn't parse [^1] in backticks as a footnote ref,
514        // then the definition is orphaned
515        let warnings = check_md066(content);
516        // Expectation depends on parser behavior - test the actual behavior
517        assert_eq!(
518            warnings.len(),
519            1,
520            "Code span reference shouldn't count, definition is orphaned"
521        );
522        assert!(warnings[0].message.contains("never referenced"));
523    }
524
525    // ==================== Frontmatter handling ====================
526
527    #[test]
528    fn test_footnote_in_frontmatter_ignored() {
529        let content = r#"---
530note: "[^1]: yaml value"
531---
532
533Regular content."#;
534        let warnings = check_md066(content);
535        assert!(
536            warnings.is_empty(),
537            "Footnotes in frontmatter should be ignored: {warnings:?}"
538        );
539    }
540
541    // ==================== Edge cases ====================
542
543    #[test]
544    fn test_empty_document() {
545        let warnings = check_md066("");
546        assert!(warnings.is_empty());
547    }
548
549    #[test]
550    fn test_footnote_with_special_characters() {
551        let content = r#"Reference[^my-note_1].
552
553[^my-note_1]: Definition with special chars in ID."#;
554        let warnings = check_md066(content);
555        assert!(
556            warnings.is_empty(),
557            "Special characters in footnote ID should work: {warnings:?}"
558        );
559    }
560
561    #[test]
562    fn test_multiline_footnote_definition() {
563        let content = r#"Reference[^long].
564
565[^long]: This is a long footnote
566    that spans multiple lines
567    with proper indentation."#;
568        let warnings = check_md066(content);
569        assert!(
570            warnings.is_empty(),
571            "Multiline footnote definitions should work: {warnings:?}"
572        );
573    }
574
575    #[test]
576    fn test_footnote_at_end_of_sentence() {
577        let content = r#"This ends with a footnote[^1].
578
579[^1]: End of sentence footnote."#;
580        let warnings = check_md066(content);
581        assert!(warnings.is_empty());
582    }
583
584    #[test]
585    fn test_footnote_mid_sentence() {
586        let content = r#"Some text[^1] continues here.
587
588[^1]: Mid-sentence footnote."#;
589        let warnings = check_md066(content);
590        assert!(warnings.is_empty());
591    }
592
593    #[test]
594    fn test_adjacent_footnotes() {
595        let content = r#"Text[^1][^2] with adjacent footnotes.
596
597[^1]: First.
598[^2]: Second."#;
599        let warnings = check_md066(content);
600        assert!(warnings.is_empty(), "Adjacent footnotes should work: {warnings:?}");
601    }
602
603    #[test]
604    fn test_footnote_only_definitions_no_references() {
605        let content = r#"[^1]: First orphan.
606[^2]: Second orphan.
607[^3]: Third orphan."#;
608        let warnings = check_md066(content);
609        assert_eq!(warnings.len(), 3, "All definitions should be flagged: {warnings:?}");
610    }
611
612    #[test]
613    fn test_footnote_only_references_no_definitions() {
614        let content = "Text[^1] and[^2] and[^3].";
615        let warnings = check_md066(content);
616        assert_eq!(warnings.len(), 3, "All references should be flagged: {warnings:?}");
617    }
618
619    // ==================== Blockquote handling ====================
620
621    #[test]
622    fn test_footnote_in_blockquote_valid() {
623        let content = r#"> This has a footnote[^1].
624>
625> [^1]: Definition inside blockquote."#;
626        let warnings = check_md066(content);
627        assert!(
628            warnings.is_empty(),
629            "Footnotes inside blockquotes should be validated: {warnings:?}"
630        );
631    }
632
633    #[test]
634    fn test_footnote_in_nested_blockquote() {
635        let content = r#"> > Nested blockquote with footnote[^nested].
636> >
637> > [^nested]: Definition in nested blockquote."#;
638        let warnings = check_md066(content);
639        assert!(
640            warnings.is_empty(),
641            "Footnotes in nested blockquotes should work: {warnings:?}"
642        );
643    }
644
645    #[test]
646    fn test_footnote_blockquote_orphaned_reference() {
647        let content = r#"> This has an orphaned footnote[^missing].
648>
649> No definition here."#;
650        let warnings = check_md066(content);
651        assert_eq!(warnings.len(), 1, "Should detect orphaned ref in blockquote");
652        assert!(warnings[0].message.contains("missing"));
653    }
654
655    #[test]
656    fn test_footnote_blockquote_orphaned_definition() {
657        let content = r#"> Some text.
658>
659> [^unused]: Never referenced in blockquote."#;
660        let warnings = check_md066(content);
661        assert_eq!(warnings.len(), 1, "Should detect orphaned def in blockquote");
662        assert!(warnings[0].message.contains("unused"));
663    }
664
665    // ==================== Duplicate definitions ====================
666
667    #[test]
668    fn test_duplicate_definition_detected() {
669        let content = r#"Reference[^1].
670
671[^1]: First definition.
672[^1]: Second definition (duplicate)."#;
673        let warnings = check_md066(content);
674        assert_eq!(warnings.len(), 1, "Should detect duplicate definition: {warnings:?}");
675        assert!(warnings[0].message.contains("Duplicate"));
676        assert!(warnings[0].message.contains("[^1]"));
677    }
678
679    #[test]
680    fn test_multiple_duplicate_definitions() {
681        let content = r#"Reference[^dup].
682
683[^dup]: First.
684[^dup]: Second.
685[^dup]: Third."#;
686        let warnings = check_md066(content);
687        assert_eq!(warnings.len(), 2, "Should detect 2 duplicate definitions: {warnings:?}");
688        assert!(warnings.iter().all(|w| w.message.contains("Duplicate")));
689    }
690
691    #[test]
692    fn test_duplicate_definition_case_insensitive() {
693        let content = r#"Reference[^Note].
694
695[^note]: Lowercase definition.
696[^NOTE]: Uppercase definition (duplicate)."#;
697        let warnings = check_md066(content);
698        assert_eq!(warnings.len(), 1, "Case-insensitive duplicate detection: {warnings:?}");
699        assert!(warnings[0].message.contains("Duplicate"));
700    }
701
702    // ==================== HTML comment handling ====================
703
704    #[test]
705    fn test_footnote_reference_in_html_comment_ignored() {
706        let content = r#"<!-- This has [^1] in a comment -->
707
708Regular text without footnotes."#;
709        let warnings = check_md066(content);
710        assert!(
711            warnings.is_empty(),
712            "Footnote refs in HTML comments should be ignored: {warnings:?}"
713        );
714    }
715
716    #[test]
717    fn test_footnote_definition_in_html_comment_ignored() {
718        let content = r#"<!--
719[^1]: Definition in HTML comment
720-->
721
722Regular text."#;
723        let warnings = check_md066(content);
724        assert!(
725            warnings.is_empty(),
726            "Footnote defs in HTML comments should be ignored: {warnings:?}"
727        );
728    }
729
730    #[test]
731    fn test_footnote_outside_html_comment_still_validated() {
732        let content = r#"<!-- Just a comment -->
733
734Text with footnote[^1].
735
736[^1]: Valid definition outside comment."#;
737        let warnings = check_md066(content);
738        assert!(warnings.is_empty(), "Valid footnote outside comment: {warnings:?}");
739    }
740
741    #[test]
742    fn test_orphaned_ref_not_saved_by_def_in_comment() {
743        let content = r#"Text with orphaned[^missing].
744
745<!--
746[^missing]: This definition is in a comment, shouldn't count
747-->"#;
748        let warnings = check_md066(content);
749        assert_eq!(warnings.len(), 1, "Def in comment shouldn't satisfy ref: {warnings:?}");
750        assert!(warnings[0].message.contains("no corresponding definition"));
751    }
752
753    // ==================== HTML block handling ====================
754
755    #[test]
756    fn test_footnote_in_html_block_ignored() {
757        // Regex character classes like [^abc] should be ignored in HTML blocks
758        let content = r#"<table>
759<tr>
760<td><code>[^abc]</code></td>
761<td>Negated character class</td>
762</tr>
763</table>
764
765Regular markdown text."#;
766        let warnings = check_md066(content);
767        assert!(
768            warnings.is_empty(),
769            "Footnote-like patterns in HTML blocks should be ignored: {warnings:?}"
770        );
771    }
772
773    #[test]
774    fn test_footnote_in_html_table_ignored() {
775        let content = r#"| Header |
776|--------|
777| Cell   |
778
779<div>
780<p>This has <code>[^0-9]</code> regex pattern</p>
781</div>
782
783Normal text."#;
784        let warnings = check_md066(content);
785        assert!(
786            warnings.is_empty(),
787            "Regex patterns in HTML div should be ignored: {warnings:?}"
788        );
789    }
790
791    #[test]
792    fn test_real_footnote_outside_html_block() {
793        let content = r#"<div>
794Some HTML content
795</div>
796
797Text with real footnote[^1].
798
799[^1]: This is a real footnote definition."#;
800        let warnings = check_md066(content);
801        assert!(
802            warnings.is_empty(),
803            "Real footnote outside HTML block should work: {warnings:?}"
804        );
805    }
806
807    // ==================== Combined edge cases ====================
808
809    #[test]
810    fn test_blockquote_with_duplicate_definitions() {
811        let content = r#"> Text[^1].
812>
813> [^1]: First.
814> [^1]: Duplicate in blockquote."#;
815        let warnings = check_md066(content);
816        assert_eq!(warnings.len(), 1, "Should detect duplicate in blockquote: {warnings:?}");
817        assert!(warnings[0].message.contains("Duplicate"));
818    }
819
820    #[test]
821    fn test_all_enhancement_features_together() {
822        let content = r#"<!-- Comment with [^comment] -->
823
824Regular text[^valid] and[^missing].
825
826> Blockquote text[^bq].
827>
828> [^bq]: Blockquote definition.
829
830[^valid]: Valid definition.
831[^valid]: Duplicate definition.
832[^unused]: Never referenced."#;
833        let warnings = check_md066(content);
834        // Should find:
835        // 1. [^missing] - orphaned reference
836        // 2. [^valid] duplicate definition
837        // 3. [^unused] - orphaned definition
838        assert_eq!(warnings.len(), 3, "Should find all issues: {warnings:?}");
839
840        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
841        assert!(
842            messages.iter().any(|m| m.contains("missing")),
843            "Should find orphaned ref"
844        );
845        assert!(
846            messages.iter().any(|m| m.contains("Duplicate")),
847            "Should find duplicate"
848        );
849        assert!(
850            messages.iter().any(|m| m.contains("unused")),
851            "Should find orphaned def"
852        );
853    }
854
855    #[test]
856    fn test_footnote_ref_at_end_of_file_no_newline() {
857        let content = "[^1]: Definition here.\n\nText with[^1]";
858        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
859        let rule = MD066FootnoteValidation;
860        let result = rule.check(&ctx).unwrap();
861        assert!(
862            result.is_empty(),
863            "Valid footnote pair without trailing newline should not warn: {result:?}"
864        );
865    }
866
867    #[test]
868    fn test_orphaned_footnote_ref_at_eof_no_newline() {
869        let content = "Text with[^missing]";
870        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
871        let rule = MD066FootnoteValidation;
872        let result = rule.check(&ctx).unwrap();
873        assert!(
874            !result.is_empty(),
875            "Orphaned ref at EOF without newline should warn: {result:?}"
876        );
877    }
878
879    #[test]
880    fn test_midline_footnote_ref_with_colon_detected_as_reference() {
881        // [^note]: mid-line is a reference followed by colon, NOT a definition
882        let content = "# Test\n\nI think [^note]: this is relevant.\n";
883        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
884        let rule = MD066FootnoteValidation;
885        let result = rule.check(&ctx).unwrap();
886        assert_eq!(
887            result.len(),
888            1,
889            "Mid-line [^note]: should be detected as undefined reference: {result:?}"
890        );
891        assert!(
892            result[0].message.contains("no corresponding definition"),
893            "Should warn about missing definition: {}",
894            result[0].message
895        );
896    }
897
898    #[test]
899    fn test_midline_footnote_ref_with_colon_matched_to_definition() {
900        // [^note]: mid-line is a reference; [^note]: at line start is the definition
901        let content = "# Test\n\nI think [^note]: this is relevant.\n\n[^note]: The actual definition.\n";
902        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
903        let rule = MD066FootnoteValidation;
904        let result = rule.check(&ctx).unwrap();
905        assert!(
906            result.is_empty(),
907            "Mid-line ref should match line-start definition: {result:?}"
908        );
909    }
910
911    #[test]
912    fn test_linestart_footnote_def_still_skipped_as_reference() {
913        // [^note]: at line start IS a definition and should NOT be counted as reference
914        let content = "# Test\n\n[^note]: The definition.\n";
915        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
916        let rule = MD066FootnoteValidation;
917        let result = rule.check(&ctx).unwrap();
918        // Should warn about orphaned definition (no reference)
919        assert_eq!(result.len(), 1, "Orphaned def should be flagged: {result:?}");
920        assert!(
921            result[0].message.contains("never referenced"),
922            "Should say 'never referenced': {}",
923            result[0].message
924        );
925    }
926
927    #[test]
928    fn test_indented_footnote_def_still_skipped() {
929        // [^note]: with 1-3 spaces indent is still a definition
930        let content = "# Test\n\n   [^note]: Indented definition.\n";
931        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
932        let rule = MD066FootnoteValidation;
933        let result = rule.check(&ctx).unwrap();
934        // Should be treated as an orphaned definition (no reference)
935        assert_eq!(result.len(), 1, "Indented def should still be detected: {result:?}");
936        assert!(
937            result[0].message.contains("never referenced"),
938            "Should say 'never referenced': {}",
939            result[0].message
940        );
941    }
942
943    #[test]
944    fn test_multiple_midline_refs_with_colons_on_same_line() {
945        // Both [^a]: and [^b]: mid-line should be counted as references
946        let content = "# Test\n\nText [^a]: and [^b]: more text.\n\n[^a]: Def A.\n[^b]: Def B.\n";
947        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
948        let rule = MD066FootnoteValidation;
949        let result = rule.check(&ctx).unwrap();
950        assert!(
951            result.is_empty(),
952            "Both mid-line refs should match their definitions: {result:?}"
953        );
954    }
955
956    #[test]
957    fn test_blockquote_footnote_def_still_skipped() {
958        // > [^note]: inside blockquote is a definition, not a reference
959        let content = "# Test\n\n> [^note]: Definition in blockquote.\n";
960        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
961        let rule = MD066FootnoteValidation;
962        let result = rule.check(&ctx).unwrap();
963        // Orphaned definition (no reference uses it)
964        assert_eq!(
965            result.len(),
966            1,
967            "Blockquote def should be detected as orphaned: {result:?}"
968        );
969        assert!(
970            result[0].message.contains("never referenced"),
971            "Should say 'never referenced': {}",
972            result[0].message
973        );
974    }
975
976    #[test]
977    fn test_list_item_footnote_ref_with_colon_is_reference() {
978        // - [^note]: inside a list item is a reference, not a definition
979        let content = "# Test\n\n- [^note]: list item text.\n\n[^note]: The actual definition.\n";
980        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
981        let rule = MD066FootnoteValidation;
982        let result = rule.check(&ctx).unwrap();
983        assert!(
984            result.is_empty(),
985            "List item [^note]: should be a ref matching the definition: {result:?}"
986        );
987    }
988
989    // ==================== Warning position tests ====================
990
991    #[test]
992    fn test_orphaned_reference_column_position() {
993        // "This references[^missing] a non-existent footnote."
994        //  column 16:     ^
995        let content = "This references[^missing] a non-existent footnote.";
996        let warnings = check_md066(content);
997        assert_eq!(warnings.len(), 1);
998        assert_eq!(warnings[0].line, 1);
999        assert_eq!(warnings[0].column, 16, "Column should point to '[^missing]'");
1000        // "[^missing]" is 10 chars, so end_column = 16 + 10 = 26
1001        assert_eq!(warnings[0].end_column, 26);
1002    }
1003
1004    #[test]
1005    fn test_orphaned_definition_column_position() {
1006        // "[^unused]: Never referenced." starts at column 1
1007        let content = "Regular text.\n\n[^unused]: Never referenced.";
1008        let warnings = check_md066(content);
1009        assert_eq!(warnings.len(), 1);
1010        assert_eq!(warnings[0].line, 3);
1011        assert_eq!(warnings[0].column, 1, "Definition at start of line");
1012        // "[^unused]:" is 10 chars
1013        assert_eq!(warnings[0].end_column, 11);
1014    }
1015
1016    #[test]
1017    fn test_duplicate_definition_column_position() {
1018        let content = "Reference[^1].\n\n[^1]: First.\n[^1]: Second.";
1019        let warnings = check_md066(content);
1020        assert_eq!(warnings.len(), 1);
1021        assert_eq!(warnings[0].line, 4);
1022        assert_eq!(warnings[0].column, 1);
1023        // "[^1]:" is 5 chars
1024        assert_eq!(warnings[0].end_column, 6);
1025    }
1026
1027    #[test]
1028    fn test_orphaned_definition_in_blockquote_column() {
1029        // "> [^unused]: Never referenced."
1030        //    ^ column 3 (after "> ")
1031        let content = "> Some text.\n>\n> [^unused]: Never referenced.";
1032        let warnings = check_md066(content);
1033        assert_eq!(warnings.len(), 1);
1034        assert_eq!(warnings[0].line, 3);
1035        assert_eq!(warnings[0].column, 3, "Should point past blockquote prefix");
1036    }
1037
1038    #[test]
1039    fn test_orphaned_reference_after_multibyte_chars() {
1040        // "日本語テキスト[^ref1] has no def."
1041        // "日本語テキスト" = 7 characters (each is 3 bytes in UTF-8)
1042        // Column should be 8 (character-based), not 22 (byte-based)
1043        let content = "日本語テキスト[^ref1] has no def.";
1044        let warnings = check_md066(content);
1045        assert_eq!(warnings.len(), 1);
1046        assert_eq!(
1047            warnings[0].column, 8,
1048            "Column should be character-based, not byte-based"
1049        );
1050        // "[^ref1]" = 7 chars
1051        assert_eq!(warnings[0].end_column, 15);
1052    }
1053
1054    #[test]
1055    fn test_orphaned_definition_with_indentation_column() {
1056        // "   [^note]:" — column should point to [^note]:, not the leading spaces
1057        let content = "# Heading\n\n   [^note]: Indented and orphaned.";
1058        let warnings = check_md066(content);
1059        assert_eq!(warnings.len(), 1);
1060        // "[^note]:" starts at column 4 (after 3 spaces)
1061        assert_eq!(warnings[0].column, 4);
1062        // "[^note]:" is 8 chars, end_column = 4 + 8 = 12
1063        assert_eq!(warnings[0].end_column, 12);
1064    }
1065
1066    #[test]
1067    fn test_orphaned_ref_end_column_uses_original_case() {
1068        // ref_id is stored lowercased, but end_column should reflect the actual source text
1069        let content = "Text with [^NOTE] here.";
1070        let warnings = check_md066(content);
1071        assert_eq!(warnings.len(), 1);
1072        // "Text with " = 10 chars, so [^NOTE] starts at column 11
1073        assert_eq!(warnings[0].column, 11);
1074        // "[^NOTE]" = 7 chars, end_column = 11 + 7 = 18
1075        assert_eq!(warnings[0].end_column, 18);
1076    }
1077}