Skip to main content

rumdl_lib/rules/
md066_footnote_validation.rs

1use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use regex::Regex;
3use std::collections::{HashMap, HashSet};
4use std::sync::LazyLock;
5
6/// Pattern to match footnote definitions: [^id]: content
7/// Matches at start of line, with 0-3 leading spaces, caret in brackets
8/// Also handles definitions inside blockquotes (after stripping > prefixes)
9pub static FOOTNOTE_DEF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[ ]{0,3}\[\^([^\]]+)\]:").unwrap());
10
11/// Pattern to match footnote references in text: [^id]
12/// Callers must manually check that the match is NOT followed by `:` (which would make it a definition)
13pub static FOOTNOTE_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[\^([^\]]+)\]").unwrap());
14
15/// Strip blockquote prefixes from a line to check for footnote definitions
16/// Handles nested blockquotes like `> > > ` and variations with/without spaces
17pub use crate::utils::blockquote::strip_blockquote_prefix;
18
19/// Find the (column, end_column) of a footnote definition marker `[^id]:` on a line.
20/// Returns 1-indexed column positions pointing to `[^id]:`, not leading whitespace.
21/// Handles blockquote prefixes and uses character counting for multi-byte support.
22pub fn footnote_def_position(line: &str) -> (usize, usize) {
23    let stripped = strip_blockquote_prefix(line);
24    if let Some(caps) = FOOTNOTE_DEF_PATTERN.captures(stripped) {
25        let prefix_chars = line.chars().count() - stripped.chars().count();
26        let id_match = caps.get(1).unwrap();
27        // `[^` is always 2 bytes before the ID capture group
28        let bracket_byte_pos = id_match.start() - 2;
29        let chars_before_bracket = stripped[..bracket_byte_pos].chars().count();
30        let full_match_end = caps.get(0).unwrap().end();
31        let marker_chars = stripped[bracket_byte_pos..full_match_end].chars().count();
32        (
33            prefix_chars + chars_before_bracket + 1,
34            prefix_chars + chars_before_bracket + marker_chars + 1,
35        )
36    } else {
37        (1, 1)
38    }
39}
40
41/// Rule MD066: Footnote validation - ensure all footnote references have definitions and vice versa
42///
43/// This rule validates footnote usage in markdown documents:
44/// - Detects orphaned footnote references (`[^1]`) without corresponding definitions
45/// - Detects orphaned footnote definitions (`[^1]: text`) that are never referenced
46///
47/// Footnote syntax (common markdown extension, not part of CommonMark):
48/// - Reference: `[^identifier]` in text
49/// - Definition: `[^identifier]: definition text` (can span multiple lines with indentation)
50///
51/// ## Examples
52///
53/// **Valid:**
54/// ```markdown
55/// This has a footnote[^1] that is properly defined.
56///
57/// [^1]: This is the footnote content.
58/// ```
59///
60/// **Invalid - orphaned reference:**
61/// ```markdown
62/// This references[^missing] a footnote that doesn't exist.
63/// ```
64///
65/// **Invalid - orphaned definition:**
66/// ```markdown
67/// [^unused]: This footnote is defined but never referenced.
68/// ```
69#[derive(Debug, Clone, Default)]
70pub struct MD066FootnoteValidation;
71
72impl MD066FootnoteValidation {
73    pub fn new() -> Self {
74        Self
75    }
76}
77
78impl Rule for MD066FootnoteValidation {
79    fn name(&self) -> &'static str {
80        "MD066"
81    }
82
83    fn description(&self) -> &'static str {
84        "Footnote validation"
85    }
86
87    fn category(&self) -> RuleCategory {
88        RuleCategory::Other
89    }
90
91    fn fix_capability(&self) -> FixCapability {
92        FixCapability::Unfixable
93    }
94
95    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
96        ctx.content.is_empty() || !ctx.content.contains("[^")
97    }
98
99    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
100        let mut warnings = Vec::new();
101
102        // Early exit if no footnotes at all
103        if ctx.footnote_refs.is_empty() && !ctx.content.contains("[^") {
104            return Ok(warnings);
105        }
106
107        // Collect all footnote references (id is WITHOUT the ^ prefix)
108        // Map from id -> list of (line, byte_offset) for each reference
109        // Note: pulldown-cmark only finds references when definitions exist,
110        // so we need to parse references directly to find orphaned ones
111        let mut references: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
112
113        // First, use pulldown-cmark's detected references (when definitions exist)
114        for footnote_ref in &ctx.footnote_refs {
115            // Skip if in code block, frontmatter, HTML comment, or HTML block
116            if ctx.line_info(footnote_ref.line).is_some_and(|info| {
117                info.in_code_block
118                    || info.in_front_matter
119                    || info.in_html_comment
120                    || info.in_mdx_comment
121                    || info.in_html_block
122            }) {
123                continue;
124            }
125            references
126                .entry(footnote_ref.id.to_lowercase())
127                .or_default()
128                .push((footnote_ref.line, footnote_ref.byte_offset));
129        }
130
131        // Also parse references directly to find orphaned ones (without definitions)
132        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
133            // Skip if in code block, frontmatter, HTML comment, or HTML block
134            if line_info.in_code_block
135                || line_info.in_front_matter
136                || line_info.in_html_comment
137                || line_info.in_mdx_comment
138                || line_info.in_html_block
139            {
140                continue;
141            }
142
143            let line = line_info.content(ctx.content);
144            let line_num = line_idx + 1; // 1-indexed
145
146            for caps in FOOTNOTE_REF_PATTERN.captures_iter(line) {
147                if let Some(id_match) = caps.get(1) {
148                    // Skip if this is a footnote definition (at line start with 0-3 spaces indent)
149                    // Also handle blockquote prefixes (e.g., "> [^id]:")
150                    let full_match = caps.get(0).unwrap();
151                    if line.as_bytes().get(full_match.end()) == Some(&b':') {
152                        let before_match = &line[..full_match.start()];
153                        if before_match.chars().all(|c| c == ' ' || c == '>') {
154                            continue;
155                        }
156                    }
157
158                    let id = id_match.as_str().to_lowercase();
159
160                    // Check if this match is inside a code span
161                    let match_start = full_match.start();
162                    let byte_offset = line_info.byte_offset + match_start;
163
164                    let in_code_span = ctx.is_in_code_span_byte(byte_offset);
165
166                    if !in_code_span {
167                        // Only add if not already found (avoid duplicates with pulldown-cmark)
168                        references.entry(id).or_default().push((line_num, byte_offset));
169                    }
170                }
171            }
172        }
173
174        // Deduplicate references (pulldown-cmark and regex might find the same ones)
175        for occurrences in references.values_mut() {
176            occurrences.sort();
177            occurrences.dedup();
178        }
179
180        // Collect footnote definitions by parsing directly from content
181        // Footnote definitions: [^id]: content (NOT in reference_defs which expects URLs)
182        // Map from id (lowercase) -> list of (line, byte_offset) for duplicate detection
183        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
184        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
185            // Skip if in code block, frontmatter, HTML comment, or HTML block
186            if line_info.in_code_block
187                || line_info.in_front_matter
188                || line_info.in_html_comment
189                || line_info.in_mdx_comment
190                || line_info.in_html_block
191            {
192                continue;
193            }
194
195            let line = line_info.content(ctx.content);
196            // Strip blockquote prefixes to handle definitions inside blockquotes
197            let line_stripped = strip_blockquote_prefix(line);
198
199            if let Some(caps) = FOOTNOTE_DEF_PATTERN.captures(line_stripped)
200                && let Some(id_match) = caps.get(1)
201            {
202                let id = id_match.as_str().to_lowercase();
203                let line_num = line_idx + 1; // 1-indexed
204                definitions
205                    .entry(id)
206                    .or_default()
207                    .push((line_num, line_info.byte_offset));
208            }
209        }
210
211        // Check for duplicate definitions
212        for (def_id, occurrences) in &definitions {
213            if occurrences.len() > 1 {
214                // Report all duplicate definitions after the first one
215                for (line, _byte_offset) in &occurrences[1..] {
216                    let (col, end_col) = ctx
217                        .lines
218                        .get(*line - 1)
219                        .map(|li| footnote_def_position(li.content(ctx.content)))
220                        .unwrap_or((1, 1));
221                    warnings.push(LintWarning {
222                        rule_name: Some(self.name().to_string()),
223                        line: *line,
224                        column: col,
225                        end_line: *line,
226                        end_column: end_col,
227                        message: format!(
228                            "Duplicate footnote definition '[^{def_id}]' (first defined on line {})",
229                            occurrences[0].0
230                        ),
231                        severity: Severity::Error,
232                        fix: None,
233                    });
234                }
235            }
236        }
237
238        // Check for orphaned references (references without definitions)
239        let defined_ids: HashSet<&String> = definitions.keys().collect();
240        for (ref_id, occurrences) in &references {
241            if !defined_ids.contains(ref_id) {
242                // Report the first occurrence of each undefined reference
243                let (line, byte_offset) = occurrences[0];
244                // Compute character-based column from byte offset within the line.
245                // Find the actual marker text in the source to get the real length,
246                // since ref_id is lowercased and may differ from the original.
247                let (col, end_col) = if let Some(line_info) = ctx.lines.get(line - 1) {
248                    let line_content = line_info.content(ctx.content);
249                    let byte_pos = byte_offset.saturating_sub(line_info.byte_offset);
250                    let char_col = line_content.get(..byte_pos).map(|s| s.chars().count()).unwrap_or(0);
251                    // Find the actual [^...] marker in the source at this position
252                    let marker_chars = line_content
253                        .get(byte_pos..)
254                        .and_then(|rest| rest.find(']'))
255                        .map(|end| line_content[byte_pos..byte_pos + end + 1].chars().count())
256                        .unwrap_or_else(|| format!("[^{ref_id}]").chars().count());
257                    (char_col + 1, char_col + marker_chars + 1)
258                } else {
259                    (1, 1)
260                };
261                warnings.push(LintWarning {
262                    rule_name: Some(self.name().to_string()),
263                    line,
264                    column: col,
265                    end_line: line,
266                    end_column: end_col,
267                    message: format!("Footnote reference '[^{ref_id}]' has no corresponding definition"),
268                    severity: Severity::Error,
269                    fix: None,
270                });
271            }
272        }
273
274        // Check for orphaned definitions (definitions without references)
275        let referenced_ids: HashSet<&String> = references.keys().collect();
276        for (def_id, occurrences) in &definitions {
277            if !referenced_ids.contains(def_id) {
278                // Report the first definition location
279                let (line, _byte_offset) = occurrences[0];
280                let (col, end_col) = ctx
281                    .lines
282                    .get(line - 1)
283                    .map(|li| footnote_def_position(li.content(ctx.content)))
284                    .unwrap_or((1, 1));
285                warnings.push(LintWarning {
286                    rule_name: Some(self.name().to_string()),
287                    line,
288                    column: col,
289                    end_line: line,
290                    end_column: end_col,
291                    message: format!("Footnote definition '[^{def_id}]' is never referenced"),
292                    severity: Severity::Error,
293                    fix: None,
294                });
295            }
296        }
297
298        // Sort warnings by line number for consistent output
299        warnings.sort_by_key(|w| w.line);
300
301        Ok(warnings)
302    }
303
304    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
305        // No automatic fix - user must decide what to do with orphaned footnotes
306        Ok(ctx.content.to_string())
307    }
308
309    fn as_any(&self) -> &dyn std::any::Any {
310        self
311    }
312
313    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
314    where
315        Self: Sized,
316    {
317        Box::new(MD066FootnoteValidation)
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324    use crate::lint_context::LintContext;
325
326    fn check_md066(content: &str) -> Vec<LintWarning> {
327        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
328        MD066FootnoteValidation::new().check(&ctx).unwrap()
329    }
330
331    // ==================== Valid cases ====================
332
333    #[test]
334    fn test_valid_single_footnote() {
335        let content = "This has a footnote[^1].\n\n[^1]: The footnote content.";
336        let warnings = check_md066(content);
337        assert!(warnings.is_empty(), "Valid footnote should not warn: {warnings:?}");
338    }
339
340    #[test]
341    fn test_valid_multiple_footnotes() {
342        let content = r#"First footnote[^1] and second[^2].
343
344[^1]: First definition.
345[^2]: Second definition."#;
346        let warnings = check_md066(content);
347        assert!(warnings.is_empty(), "Valid footnotes should not warn: {warnings:?}");
348    }
349
350    #[test]
351    fn test_valid_named_footnotes() {
352        let content = r#"See the note[^note] and warning[^warning].
353
354[^note]: This is a note.
355[^warning]: This is a warning."#;
356        let warnings = check_md066(content);
357        assert!(warnings.is_empty(), "Named footnotes should not warn: {warnings:?}");
358    }
359
360    #[test]
361    fn test_valid_footnote_used_multiple_times() {
362        let content = r#"First[^1] and again[^1] and third[^1].
363
364[^1]: Used multiple times."#;
365        let warnings = check_md066(content);
366        assert!(warnings.is_empty(), "Reused footnote should not warn: {warnings:?}");
367    }
368
369    #[test]
370    fn test_valid_case_insensitive_matching() {
371        let content = r#"Reference[^NOTE].
372
373[^note]: Definition with different case."#;
374        let warnings = check_md066(content);
375        assert!(
376            warnings.is_empty(),
377            "Case-insensitive matching should work: {warnings:?}"
378        );
379    }
380
381    #[test]
382    fn test_no_footnotes_at_all() {
383        let content = "Just regular markdown without any footnotes.";
384        let warnings = check_md066(content);
385        assert!(warnings.is_empty(), "No footnotes should not warn");
386    }
387
388    // ==================== Orphaned references ====================
389
390    #[test]
391    fn test_orphaned_reference_single() {
392        let content = "This references[^missing] a non-existent footnote.";
393        let warnings = check_md066(content);
394        assert_eq!(warnings.len(), 1, "Should detect orphaned reference");
395        assert!(warnings[0].message.contains("missing"));
396        assert!(warnings[0].message.contains("no corresponding definition"));
397    }
398
399    #[test]
400    fn test_orphaned_reference_multiple() {
401        let content = r#"First[^a], second[^b], third[^c].
402
403[^b]: Only b is defined."#;
404        let warnings = check_md066(content);
405        assert_eq!(warnings.len(), 2, "Should detect 2 orphaned references: {warnings:?}");
406        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
407        assert!(messages.iter().any(|m| m.contains("[^a]")));
408        assert!(messages.iter().any(|m| m.contains("[^c]")));
409    }
410
411    #[test]
412    fn test_orphaned_reference_reports_first_occurrence() {
413        let content = "First[^missing] and again[^missing] and third[^missing].";
414        let warnings = check_md066(content);
415        // Should only report once per unique ID
416        assert_eq!(warnings.len(), 1, "Should report each orphaned ID once");
417        assert!(warnings[0].message.contains("missing"));
418    }
419
420    // ==================== Orphaned definitions ====================
421
422    #[test]
423    fn test_orphaned_definition_single() {
424        let content = "Regular text.\n\n[^unused]: This is never referenced.";
425        let warnings = check_md066(content);
426        assert_eq!(warnings.len(), 1, "Should detect orphaned definition");
427        assert!(warnings[0].message.contains("unused"));
428        assert!(warnings[0].message.contains("never referenced"));
429    }
430
431    #[test]
432    fn test_orphaned_definition_multiple() {
433        let content = r#"Using one[^used].
434
435[^used]: This is used.
436[^orphan1]: Never used.
437[^orphan2]: Also never used."#;
438        let warnings = check_md066(content);
439        assert_eq!(warnings.len(), 2, "Should detect 2 orphaned definitions: {warnings:?}");
440        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
441        assert!(messages.iter().any(|m| m.contains("orphan1")));
442        assert!(messages.iter().any(|m| m.contains("orphan2")));
443    }
444
445    // ==================== Mixed cases ====================
446
447    #[test]
448    fn test_both_orphaned_reference_and_definition() {
449        let content = r#"Reference[^missing].
450
451[^unused]: Never referenced."#;
452        let warnings = check_md066(content);
453        assert_eq!(
454            warnings.len(),
455            2,
456            "Should detect both orphaned ref and def: {warnings:?}"
457        );
458        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
459        assert!(
460            messages.iter().any(|m| m.contains("missing")),
461            "Should find missing ref"
462        );
463        assert!(messages.iter().any(|m| m.contains("unused")), "Should find unused def");
464    }
465
466    // ==================== Code block handling ====================
467
468    #[test]
469    fn test_footnote_in_code_block_ignored() {
470        let content = r#"```
471[^1]: This is in a code block
472```
473
474Regular text without footnotes."#;
475        let warnings = check_md066(content);
476        assert!(warnings.is_empty(), "Footnotes in code blocks should be ignored");
477    }
478
479    #[test]
480    fn test_footnote_reference_in_code_span_ignored() {
481        // Note: This depends on whether pulldown-cmark parses footnotes inside code spans
482        // If it does, we should skip them
483        let content = r#"Use `[^1]` syntax for footnotes.
484
485[^1]: This definition exists but the reference in backticks shouldn't count."#;
486        // This is tricky - if pulldown-cmark doesn't parse [^1] in backticks as a footnote ref,
487        // then the definition is orphaned
488        let warnings = check_md066(content);
489        // Expectation depends on parser behavior - test the actual behavior
490        assert_eq!(
491            warnings.len(),
492            1,
493            "Code span reference shouldn't count, definition is orphaned"
494        );
495        assert!(warnings[0].message.contains("never referenced"));
496    }
497
498    // ==================== Frontmatter handling ====================
499
500    #[test]
501    fn test_footnote_in_frontmatter_ignored() {
502        let content = r#"---
503note: "[^1]: yaml value"
504---
505
506Regular content."#;
507        let warnings = check_md066(content);
508        assert!(
509            warnings.is_empty(),
510            "Footnotes in frontmatter should be ignored: {warnings:?}"
511        );
512    }
513
514    // ==================== Edge cases ====================
515
516    #[test]
517    fn test_empty_document() {
518        let warnings = check_md066("");
519        assert!(warnings.is_empty());
520    }
521
522    #[test]
523    fn test_footnote_with_special_characters() {
524        let content = r#"Reference[^my-note_1].
525
526[^my-note_1]: Definition with special chars in ID."#;
527        let warnings = check_md066(content);
528        assert!(
529            warnings.is_empty(),
530            "Special characters in footnote ID should work: {warnings:?}"
531        );
532    }
533
534    #[test]
535    fn test_multiline_footnote_definition() {
536        let content = r#"Reference[^long].
537
538[^long]: This is a long footnote
539    that spans multiple lines
540    with proper indentation."#;
541        let warnings = check_md066(content);
542        assert!(
543            warnings.is_empty(),
544            "Multiline footnote definitions should work: {warnings:?}"
545        );
546    }
547
548    #[test]
549    fn test_footnote_at_end_of_sentence() {
550        let content = r#"This ends with a footnote[^1].
551
552[^1]: End of sentence footnote."#;
553        let warnings = check_md066(content);
554        assert!(warnings.is_empty());
555    }
556
557    #[test]
558    fn test_footnote_mid_sentence() {
559        let content = r#"Some text[^1] continues here.
560
561[^1]: Mid-sentence footnote."#;
562        let warnings = check_md066(content);
563        assert!(warnings.is_empty());
564    }
565
566    #[test]
567    fn test_adjacent_footnotes() {
568        let content = r#"Text[^1][^2] with adjacent footnotes.
569
570[^1]: First.
571[^2]: Second."#;
572        let warnings = check_md066(content);
573        assert!(warnings.is_empty(), "Adjacent footnotes should work: {warnings:?}");
574    }
575
576    #[test]
577    fn test_footnote_only_definitions_no_references() {
578        let content = r#"[^1]: First orphan.
579[^2]: Second orphan.
580[^3]: Third orphan."#;
581        let warnings = check_md066(content);
582        assert_eq!(warnings.len(), 3, "All definitions should be flagged: {warnings:?}");
583    }
584
585    #[test]
586    fn test_footnote_only_references_no_definitions() {
587        let content = "Text[^1] and[^2] and[^3].";
588        let warnings = check_md066(content);
589        assert_eq!(warnings.len(), 3, "All references should be flagged: {warnings:?}");
590    }
591
592    // ==================== Blockquote handling ====================
593
594    #[test]
595    fn test_footnote_in_blockquote_valid() {
596        let content = r#"> This has a footnote[^1].
597>
598> [^1]: Definition inside blockquote."#;
599        let warnings = check_md066(content);
600        assert!(
601            warnings.is_empty(),
602            "Footnotes inside blockquotes should be validated: {warnings:?}"
603        );
604    }
605
606    #[test]
607    fn test_footnote_in_nested_blockquote() {
608        let content = r#"> > Nested blockquote with footnote[^nested].
609> >
610> > [^nested]: Definition in nested blockquote."#;
611        let warnings = check_md066(content);
612        assert!(
613            warnings.is_empty(),
614            "Footnotes in nested blockquotes should work: {warnings:?}"
615        );
616    }
617
618    #[test]
619    fn test_footnote_blockquote_orphaned_reference() {
620        let content = r#"> This has an orphaned footnote[^missing].
621>
622> No definition here."#;
623        let warnings = check_md066(content);
624        assert_eq!(warnings.len(), 1, "Should detect orphaned ref in blockquote");
625        assert!(warnings[0].message.contains("missing"));
626    }
627
628    #[test]
629    fn test_footnote_blockquote_orphaned_definition() {
630        let content = r#"> Some text.
631>
632> [^unused]: Never referenced in blockquote."#;
633        let warnings = check_md066(content);
634        assert_eq!(warnings.len(), 1, "Should detect orphaned def in blockquote");
635        assert!(warnings[0].message.contains("unused"));
636    }
637
638    // ==================== Duplicate definitions ====================
639
640    #[test]
641    fn test_duplicate_definition_detected() {
642        let content = r#"Reference[^1].
643
644[^1]: First definition.
645[^1]: Second definition (duplicate)."#;
646        let warnings = check_md066(content);
647        assert_eq!(warnings.len(), 1, "Should detect duplicate definition: {warnings:?}");
648        assert!(warnings[0].message.contains("Duplicate"));
649        assert!(warnings[0].message.contains("[^1]"));
650    }
651
652    #[test]
653    fn test_multiple_duplicate_definitions() {
654        let content = r#"Reference[^dup].
655
656[^dup]: First.
657[^dup]: Second.
658[^dup]: Third."#;
659        let warnings = check_md066(content);
660        assert_eq!(warnings.len(), 2, "Should detect 2 duplicate definitions: {warnings:?}");
661        assert!(warnings.iter().all(|w| w.message.contains("Duplicate")));
662    }
663
664    #[test]
665    fn test_duplicate_definition_case_insensitive() {
666        let content = r#"Reference[^Note].
667
668[^note]: Lowercase definition.
669[^NOTE]: Uppercase definition (duplicate)."#;
670        let warnings = check_md066(content);
671        assert_eq!(warnings.len(), 1, "Case-insensitive duplicate detection: {warnings:?}");
672        assert!(warnings[0].message.contains("Duplicate"));
673    }
674
675    // ==================== HTML comment handling ====================
676
677    #[test]
678    fn test_footnote_reference_in_html_comment_ignored() {
679        let content = r#"<!-- This has [^1] in a comment -->
680
681Regular text without footnotes."#;
682        let warnings = check_md066(content);
683        assert!(
684            warnings.is_empty(),
685            "Footnote refs in HTML comments should be ignored: {warnings:?}"
686        );
687    }
688
689    #[test]
690    fn test_footnote_definition_in_html_comment_ignored() {
691        let content = r#"<!--
692[^1]: Definition in HTML comment
693-->
694
695Regular text."#;
696        let warnings = check_md066(content);
697        assert!(
698            warnings.is_empty(),
699            "Footnote defs in HTML comments should be ignored: {warnings:?}"
700        );
701    }
702
703    #[test]
704    fn test_footnote_outside_html_comment_still_validated() {
705        let content = r#"<!-- Just a comment -->
706
707Text with footnote[^1].
708
709[^1]: Valid definition outside comment."#;
710        let warnings = check_md066(content);
711        assert!(warnings.is_empty(), "Valid footnote outside comment: {warnings:?}");
712    }
713
714    #[test]
715    fn test_orphaned_ref_not_saved_by_def_in_comment() {
716        let content = r#"Text with orphaned[^missing].
717
718<!--
719[^missing]: This definition is in a comment, shouldn't count
720-->"#;
721        let warnings = check_md066(content);
722        assert_eq!(warnings.len(), 1, "Def in comment shouldn't satisfy ref: {warnings:?}");
723        assert!(warnings[0].message.contains("no corresponding definition"));
724    }
725
726    // ==================== HTML block handling ====================
727
728    #[test]
729    fn test_footnote_in_html_block_ignored() {
730        // Regex character classes like [^abc] should be ignored in HTML blocks
731        let content = r#"<table>
732<tr>
733<td><code>[^abc]</code></td>
734<td>Negated character class</td>
735</tr>
736</table>
737
738Regular markdown text."#;
739        let warnings = check_md066(content);
740        assert!(
741            warnings.is_empty(),
742            "Footnote-like patterns in HTML blocks should be ignored: {warnings:?}"
743        );
744    }
745
746    #[test]
747    fn test_footnote_in_html_table_ignored() {
748        let content = r#"| Header |
749|--------|
750| Cell   |
751
752<div>
753<p>This has <code>[^0-9]</code> regex pattern</p>
754</div>
755
756Normal text."#;
757        let warnings = check_md066(content);
758        assert!(
759            warnings.is_empty(),
760            "Regex patterns in HTML div should be ignored: {warnings:?}"
761        );
762    }
763
764    #[test]
765    fn test_real_footnote_outside_html_block() {
766        let content = r#"<div>
767Some HTML content
768</div>
769
770Text with real footnote[^1].
771
772[^1]: This is a real footnote definition."#;
773        let warnings = check_md066(content);
774        assert!(
775            warnings.is_empty(),
776            "Real footnote outside HTML block should work: {warnings:?}"
777        );
778    }
779
780    // ==================== Combined edge cases ====================
781
782    #[test]
783    fn test_blockquote_with_duplicate_definitions() {
784        let content = r#"> Text[^1].
785>
786> [^1]: First.
787> [^1]: Duplicate in blockquote."#;
788        let warnings = check_md066(content);
789        assert_eq!(warnings.len(), 1, "Should detect duplicate in blockquote: {warnings:?}");
790        assert!(warnings[0].message.contains("Duplicate"));
791    }
792
793    #[test]
794    fn test_all_enhancement_features_together() {
795        let content = r#"<!-- Comment with [^comment] -->
796
797Regular text[^valid] and[^missing].
798
799> Blockquote text[^bq].
800>
801> [^bq]: Blockquote definition.
802
803[^valid]: Valid definition.
804[^valid]: Duplicate definition.
805[^unused]: Never referenced."#;
806        let warnings = check_md066(content);
807        // Should find:
808        // 1. [^missing] - orphaned reference
809        // 2. [^valid] duplicate definition
810        // 3. [^unused] - orphaned definition
811        assert_eq!(warnings.len(), 3, "Should find all issues: {warnings:?}");
812
813        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
814        assert!(
815            messages.iter().any(|m| m.contains("missing")),
816            "Should find orphaned ref"
817        );
818        assert!(
819            messages.iter().any(|m| m.contains("Duplicate")),
820            "Should find duplicate"
821        );
822        assert!(
823            messages.iter().any(|m| m.contains("unused")),
824            "Should find orphaned def"
825        );
826    }
827
828    #[test]
829    fn test_footnote_ref_at_end_of_file_no_newline() {
830        let content = "[^1]: Definition here.\n\nText with[^1]";
831        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
832        let rule = MD066FootnoteValidation;
833        let result = rule.check(&ctx).unwrap();
834        assert!(
835            result.is_empty(),
836            "Valid footnote pair without trailing newline should not warn: {result:?}"
837        );
838    }
839
840    #[test]
841    fn test_orphaned_footnote_ref_at_eof_no_newline() {
842        let content = "Text with[^missing]";
843        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
844        let rule = MD066FootnoteValidation;
845        let result = rule.check(&ctx).unwrap();
846        assert!(
847            !result.is_empty(),
848            "Orphaned ref at EOF without newline should warn: {result:?}"
849        );
850    }
851
852    #[test]
853    fn test_midline_footnote_ref_with_colon_detected_as_reference() {
854        // [^note]: mid-line is a reference followed by colon, NOT a definition
855        let content = "# Test\n\nI think [^note]: this is relevant.\n";
856        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
857        let rule = MD066FootnoteValidation;
858        let result = rule.check(&ctx).unwrap();
859        assert_eq!(
860            result.len(),
861            1,
862            "Mid-line [^note]: should be detected as undefined reference: {result:?}"
863        );
864        assert!(
865            result[0].message.contains("no corresponding definition"),
866            "Should warn about missing definition: {}",
867            result[0].message
868        );
869    }
870
871    #[test]
872    fn test_midline_footnote_ref_with_colon_matched_to_definition() {
873        // [^note]: mid-line is a reference; [^note]: at line start is the definition
874        let content = "# Test\n\nI think [^note]: this is relevant.\n\n[^note]: The actual definition.\n";
875        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
876        let rule = MD066FootnoteValidation;
877        let result = rule.check(&ctx).unwrap();
878        assert!(
879            result.is_empty(),
880            "Mid-line ref should match line-start definition: {result:?}"
881        );
882    }
883
884    #[test]
885    fn test_linestart_footnote_def_still_skipped_as_reference() {
886        // [^note]: at line start IS a definition and should NOT be counted as reference
887        let content = "# Test\n\n[^note]: The definition.\n";
888        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
889        let rule = MD066FootnoteValidation;
890        let result = rule.check(&ctx).unwrap();
891        // Should warn about orphaned definition (no reference)
892        assert_eq!(result.len(), 1, "Orphaned def should be flagged: {result:?}");
893        assert!(
894            result[0].message.contains("never referenced"),
895            "Should say 'never referenced': {}",
896            result[0].message
897        );
898    }
899
900    #[test]
901    fn test_indented_footnote_def_still_skipped() {
902        // [^note]: with 1-3 spaces indent is still a definition
903        let content = "# Test\n\n   [^note]: Indented definition.\n";
904        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
905        let rule = MD066FootnoteValidation;
906        let result = rule.check(&ctx).unwrap();
907        // Should be treated as an orphaned definition (no reference)
908        assert_eq!(result.len(), 1, "Indented def should still be detected: {result:?}");
909        assert!(
910            result[0].message.contains("never referenced"),
911            "Should say 'never referenced': {}",
912            result[0].message
913        );
914    }
915
916    #[test]
917    fn test_multiple_midline_refs_with_colons_on_same_line() {
918        // Both [^a]: and [^b]: mid-line should be counted as references
919        let content = "# Test\n\nText [^a]: and [^b]: more text.\n\n[^a]: Def A.\n[^b]: Def B.\n";
920        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
921        let rule = MD066FootnoteValidation;
922        let result = rule.check(&ctx).unwrap();
923        assert!(
924            result.is_empty(),
925            "Both mid-line refs should match their definitions: {result:?}"
926        );
927    }
928
929    #[test]
930    fn test_blockquote_footnote_def_still_skipped() {
931        // > [^note]: inside blockquote is a definition, not a reference
932        let content = "# Test\n\n> [^note]: Definition in blockquote.\n";
933        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
934        let rule = MD066FootnoteValidation;
935        let result = rule.check(&ctx).unwrap();
936        // Orphaned definition (no reference uses it)
937        assert_eq!(
938            result.len(),
939            1,
940            "Blockquote def should be detected as orphaned: {result:?}"
941        );
942        assert!(
943            result[0].message.contains("never referenced"),
944            "Should say 'never referenced': {}",
945            result[0].message
946        );
947    }
948
949    #[test]
950    fn test_list_item_footnote_ref_with_colon_is_reference() {
951        // - [^note]: inside a list item is a reference, not a definition
952        let content = "# Test\n\n- [^note]: list item text.\n\n[^note]: The actual definition.\n";
953        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
954        let rule = MD066FootnoteValidation;
955        let result = rule.check(&ctx).unwrap();
956        assert!(
957            result.is_empty(),
958            "List item [^note]: should be a ref matching the definition: {result:?}"
959        );
960    }
961
962    // ==================== Warning position tests ====================
963
964    #[test]
965    fn test_orphaned_reference_column_position() {
966        // "This references[^missing] a non-existent footnote."
967        //  column 16:     ^
968        let content = "This references[^missing] a non-existent footnote.";
969        let warnings = check_md066(content);
970        assert_eq!(warnings.len(), 1);
971        assert_eq!(warnings[0].line, 1);
972        assert_eq!(warnings[0].column, 16, "Column should point to '[^missing]'");
973        // "[^missing]" is 10 chars, so end_column = 16 + 10 = 26
974        assert_eq!(warnings[0].end_column, 26);
975    }
976
977    #[test]
978    fn test_orphaned_definition_column_position() {
979        // "[^unused]: Never referenced." starts at column 1
980        let content = "Regular text.\n\n[^unused]: Never referenced.";
981        let warnings = check_md066(content);
982        assert_eq!(warnings.len(), 1);
983        assert_eq!(warnings[0].line, 3);
984        assert_eq!(warnings[0].column, 1, "Definition at start of line");
985        // "[^unused]:" is 10 chars
986        assert_eq!(warnings[0].end_column, 11);
987    }
988
989    #[test]
990    fn test_duplicate_definition_column_position() {
991        let content = "Reference[^1].\n\n[^1]: First.\n[^1]: Second.";
992        let warnings = check_md066(content);
993        assert_eq!(warnings.len(), 1);
994        assert_eq!(warnings[0].line, 4);
995        assert_eq!(warnings[0].column, 1);
996        // "[^1]:" is 5 chars
997        assert_eq!(warnings[0].end_column, 6);
998    }
999
1000    #[test]
1001    fn test_orphaned_definition_in_blockquote_column() {
1002        // "> [^unused]: Never referenced."
1003        //    ^ column 3 (after "> ")
1004        let content = "> Some text.\n>\n> [^unused]: Never referenced.";
1005        let warnings = check_md066(content);
1006        assert_eq!(warnings.len(), 1);
1007        assert_eq!(warnings[0].line, 3);
1008        assert_eq!(warnings[0].column, 3, "Should point past blockquote prefix");
1009    }
1010
1011    #[test]
1012    fn test_orphaned_reference_after_multibyte_chars() {
1013        // "日本語テキスト[^ref1] has no def."
1014        // "日本語テキスト" = 7 characters (each is 3 bytes in UTF-8)
1015        // Column should be 8 (character-based), not 22 (byte-based)
1016        let content = "日本語テキスト[^ref1] has no def.";
1017        let warnings = check_md066(content);
1018        assert_eq!(warnings.len(), 1);
1019        assert_eq!(
1020            warnings[0].column, 8,
1021            "Column should be character-based, not byte-based"
1022        );
1023        // "[^ref1]" = 7 chars
1024        assert_eq!(warnings[0].end_column, 15);
1025    }
1026
1027    #[test]
1028    fn test_orphaned_definition_with_indentation_column() {
1029        // "   [^note]:" — column should point to [^note]:, not the leading spaces
1030        let content = "# Heading\n\n   [^note]: Indented and orphaned.";
1031        let warnings = check_md066(content);
1032        assert_eq!(warnings.len(), 1);
1033        // "[^note]:" starts at column 4 (after 3 spaces)
1034        assert_eq!(warnings[0].column, 4);
1035        // "[^note]:" is 8 chars, end_column = 4 + 8 = 12
1036        assert_eq!(warnings[0].end_column, 12);
1037    }
1038
1039    #[test]
1040    fn test_orphaned_ref_end_column_uses_original_case() {
1041        // ref_id is stored lowercased, but end_column should reflect the actual source text
1042        let content = "Text with [^NOTE] here.";
1043        let warnings = check_md066(content);
1044        assert_eq!(warnings.len(), 1);
1045        // "Text with " = 10 chars, so [^NOTE] starts at column 11
1046        assert_eq!(warnings[0].column, 11);
1047        // "[^NOTE]" = 7 chars, end_column = 11 + 7 = 18
1048        assert_eq!(warnings[0].end_column, 18);
1049    }
1050}