Skip to main content

rumdl_lib/rules/
md066_footnote_validation.rs

1use crate::rule::{FixCapability, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
2use regex::Regex;
3use std::collections::{HashMap, HashSet};
4use std::sync::LazyLock;
5
6/// Pattern to match footnote definitions: [^id]: content
7/// Matches at start of line, with 0-3 leading spaces, caret in brackets
8/// Also handles definitions inside blockquotes (after stripping > prefixes)
9pub(super) static FOOTNOTE_DEF_PATTERN: LazyLock<Regex> =
10    LazyLock::new(|| Regex::new(r"^[ ]{0,3}\[\^([^\]]+)\]:").unwrap());
11
12/// Pattern to match footnote references in text: [^id]
13/// Callers must manually check that the match is NOT followed by `:` (which would make it a definition)
14pub(super) static FOOTNOTE_REF_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\[\^([^\]]+)\]").unwrap());
15
16/// Strip blockquote prefixes from a line to check for footnote definitions
17/// Handles nested blockquotes like `> > > ` and variations with/without spaces
18pub(super) use crate::utils::blockquote::strip_blockquote_prefix;
19
20/// Find the (column, end_column) of a footnote definition marker `[^id]:` on a line.
21/// Returns 1-indexed column positions pointing to `[^id]:`, not leading whitespace.
22/// Handles blockquote prefixes and uses character counting for multi-byte support.
23pub(super) fn footnote_def_position(line: &str) -> (usize, usize) {
24    let stripped = strip_blockquote_prefix(line);
25    if let Some(caps) = FOOTNOTE_DEF_PATTERN.captures(stripped) {
26        let prefix_chars = line.chars().count() - stripped.chars().count();
27        let id_match = caps.get(1).unwrap();
28        // `[^` is always 2 bytes before the ID capture group
29        let bracket_byte_pos = id_match.start() - 2;
30        let chars_before_bracket = stripped[..bracket_byte_pos].chars().count();
31        let full_match_end = caps.get(0).unwrap().end();
32        let marker_chars = stripped[bracket_byte_pos..full_match_end].chars().count();
33        (
34            prefix_chars + chars_before_bracket + 1,
35            prefix_chars + chars_before_bracket + marker_chars + 1,
36        )
37    } else {
38        (1, 1)
39    }
40}
41
42/// Rule MD066: Footnote validation - ensure all footnote references have definitions and vice versa
43///
44/// This rule validates footnote usage in markdown documents:
45/// - Detects orphaned footnote references (`[^1]`) without corresponding definitions
46/// - Detects orphaned footnote definitions (`[^1]: text`) that are never referenced
47///
48/// Footnote syntax (common markdown extension, not part of CommonMark):
49/// - Reference: `[^identifier]` in text
50/// - Definition: `[^identifier]: definition text` (can span multiple lines with indentation)
51///
52/// ## Examples
53///
54/// **Valid:**
55/// ```markdown
56/// This has a footnote[^1] that is properly defined.
57///
58/// [^1]: This is the footnote content.
59/// ```
60///
61/// **Invalid - orphaned reference:**
62/// ```markdown
63/// This references[^missing] a footnote that doesn't exist.
64/// ```
65///
66/// **Invalid - orphaned definition:**
67/// ```markdown
68/// [^unused]: This footnote is defined but never referenced.
69/// ```
70#[derive(Debug, Clone, Default)]
71pub struct MD066FootnoteValidation;
72
73impl MD066FootnoteValidation {
74    pub fn new() -> Self {
75        Self
76    }
77}
78
79impl Rule for MD066FootnoteValidation {
80    fn name(&self) -> &'static str {
81        "MD066"
82    }
83
84    fn description(&self) -> &'static str {
85        "Footnote validation"
86    }
87
88    fn category(&self) -> RuleCategory {
89        RuleCategory::Other
90    }
91
92    fn fix_capability(&self) -> FixCapability {
93        FixCapability::Unfixable
94    }
95
96    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
97        ctx.content.is_empty() || !ctx.content.contains("[^")
98    }
99
100    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
101        let mut warnings = Vec::new();
102
103        // Early exit if no footnotes at all
104        if ctx.footnote_refs.is_empty() && !ctx.content.contains("[^") {
105            return Ok(warnings);
106        }
107
108        // Collect all footnote references (id is WITHOUT the ^ prefix)
109        // Map from id -> list of (line, byte_offset) for each reference
110        // Note: pulldown-cmark only finds references when definitions exist,
111        // so we need to parse references directly to find orphaned ones
112        let mut references: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
113
114        // First, use pulldown-cmark's detected references (when definitions exist)
115        for footnote_ref in &ctx.footnote_refs {
116            // Skip if in code block, frontmatter, HTML comment, or HTML block
117            if ctx.line_info(footnote_ref.line).is_some_and(|info| {
118                info.in_code_block
119                    || info.in_front_matter
120                    || info.in_html_comment
121                    || info.in_mdx_comment
122                    || info.in_html_block
123            }) {
124                continue;
125            }
126            references
127                .entry(footnote_ref.id.to_lowercase())
128                .or_default()
129                .push((footnote_ref.line, footnote_ref.byte_offset));
130        }
131
132        // Also parse references directly to find orphaned ones (without definitions)
133        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
134            // Skip if in code block, frontmatter, HTML comment, or HTML block
135            if line_info.in_code_block
136                || line_info.in_front_matter
137                || line_info.in_html_comment
138                || line_info.in_mdx_comment
139                || line_info.in_html_block
140            {
141                continue;
142            }
143
144            let line = line_info.content(ctx.content);
145            let line_num = line_idx + 1; // 1-indexed
146
147            for caps in FOOTNOTE_REF_PATTERN.captures_iter(line) {
148                if let Some(id_match) = caps.get(1) {
149                    // Skip if this is a footnote definition (at line start with 0-3 spaces indent)
150                    // Also handle blockquote prefixes (e.g., "> [^id]:")
151                    let full_match = caps.get(0).unwrap();
152                    if line.as_bytes().get(full_match.end()) == Some(&b':') {
153                        let before_match = &line[..full_match.start()];
154                        if before_match.chars().all(|c| c == ' ' || c == '>') {
155                            continue;
156                        }
157                    }
158
159                    let id = id_match.as_str().to_lowercase();
160
161                    // Check if this match is inside a code span
162                    let match_start = full_match.start();
163                    let byte_offset = line_info.byte_offset + match_start;
164
165                    let in_code_span = ctx.is_in_code_span_byte(byte_offset);
166
167                    if !in_code_span {
168                        // Only add if not already found (avoid duplicates with pulldown-cmark)
169                        references.entry(id).or_default().push((line_num, byte_offset));
170                    }
171                }
172            }
173        }
174
175        // Deduplicate references (pulldown-cmark and regex might find the same ones)
176        for occurrences in references.values_mut() {
177            occurrences.sort_unstable();
178            occurrences.dedup();
179        }
180
181        // Collect footnote definitions by parsing directly from content
182        // Footnote definitions: [^id]: content (NOT in reference_defs which expects URLs)
183        // Map from id (lowercase) -> list of (line, byte_offset) for duplicate detection
184        let mut definitions: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
185        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
186            // Skip if in code block, frontmatter, HTML comment, or HTML block
187            if line_info.in_code_block
188                || line_info.in_front_matter
189                || line_info.in_html_comment
190                || line_info.in_mdx_comment
191                || line_info.in_html_block
192            {
193                continue;
194            }
195
196            let line = line_info.content(ctx.content);
197            // Strip blockquote prefixes to handle definitions inside blockquotes
198            let line_stripped = strip_blockquote_prefix(line);
199
200            if let Some(caps) = FOOTNOTE_DEF_PATTERN.captures(line_stripped)
201                && let Some(id_match) = caps.get(1)
202            {
203                let id = id_match.as_str().to_lowercase();
204                let line_num = line_idx + 1; // 1-indexed
205                definitions
206                    .entry(id)
207                    .or_default()
208                    .push((line_num, line_info.byte_offset));
209            }
210        }
211
212        // Check for duplicate definitions
213        for (def_id, occurrences) in &definitions {
214            if occurrences.len() > 1 {
215                // Report all duplicate definitions after the first one
216                for (line, _byte_offset) in &occurrences[1..] {
217                    let (col, end_col) = ctx
218                        .lines
219                        .get(*line - 1)
220                        .map_or((1, 1), |li| footnote_def_position(li.content(ctx.content)));
221                    warnings.push(LintWarning {
222                        rule_name: Some(self.name().to_string()),
223                        line: *line,
224                        column: col,
225                        end_line: *line,
226                        end_column: end_col,
227                        message: format!(
228                            "Duplicate footnote definition '[^{def_id}]' (first defined on line {})",
229                            occurrences[0].0
230                        ),
231                        severity: Severity::Error,
232                        fix: None,
233                    });
234                }
235            }
236        }
237
238        // Check for orphaned references (references without definitions)
239        let defined_ids: HashSet<&String> = definitions.keys().collect();
240        for (ref_id, occurrences) in &references {
241            if !defined_ids.contains(ref_id) {
242                // Report the first occurrence of each undefined reference
243                let (line, byte_offset) = occurrences[0];
244                // Compute character-based column from byte offset within the line.
245                // Find the actual marker text in the source to get the real length,
246                // since ref_id is lowercased and may differ from the original.
247                let (col, end_col) = if let Some(line_info) = ctx.lines.get(line - 1) {
248                    let line_content = line_info.content(ctx.content);
249                    let byte_pos = byte_offset.saturating_sub(line_info.byte_offset);
250                    let char_col = line_content.get(..byte_pos).map_or(0, |s| s.chars().count());
251                    // Find the actual [^...] marker in the source at this position
252                    let marker_chars = line_content
253                        .get(byte_pos..)
254                        .and_then(|rest| rest.find(']'))
255                        .map_or_else(
256                            || format!("[^{ref_id}]").chars().count(),
257                            |end| line_content[byte_pos..=(byte_pos + end)].chars().count(),
258                        );
259                    (char_col + 1, char_col + marker_chars + 1)
260                } else {
261                    (1, 1)
262                };
263                warnings.push(LintWarning {
264                    rule_name: Some(self.name().to_string()),
265                    line,
266                    column: col,
267                    end_line: line,
268                    end_column: end_col,
269                    message: format!("Footnote reference '[^{ref_id}]' has no corresponding definition"),
270                    severity: Severity::Error,
271                    fix: None,
272                });
273            }
274        }
275
276        // Check for orphaned definitions (definitions without references)
277        let referenced_ids: HashSet<&String> = references.keys().collect();
278        for (def_id, occurrences) in &definitions {
279            if !referenced_ids.contains(def_id) {
280                // Report the first definition location
281                let (line, _byte_offset) = occurrences[0];
282                let (col, end_col) = ctx
283                    .lines
284                    .get(line - 1)
285                    .map_or((1, 1), |li| footnote_def_position(li.content(ctx.content)));
286                warnings.push(LintWarning {
287                    rule_name: Some(self.name().to_string()),
288                    line,
289                    column: col,
290                    end_line: line,
291                    end_column: end_col,
292                    message: format!("Footnote definition '[^{def_id}]' is never referenced"),
293                    severity: Severity::Error,
294                    fix: None,
295                });
296            }
297        }
298
299        // Sort warnings by line number for consistent output
300        warnings.sort_by_key(|w| w.line);
301
302        Ok(warnings)
303    }
304
305    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
306        // No automatic fix - user must decide what to do with orphaned footnotes
307        Ok(ctx.content.to_string())
308    }
309
310    fn as_any(&self) -> &dyn std::any::Any {
311        self
312    }
313
314    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
315    where
316        Self: Sized,
317    {
318        Box::new(MD066FootnoteValidation)
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325    use crate::lint_context::LintContext;
326
327    fn check_md066(content: &str) -> Vec<LintWarning> {
328        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
329        MD066FootnoteValidation::new().check(&ctx).unwrap()
330    }
331
332    // ==================== Valid cases ====================
333
334    #[test]
335    fn test_valid_single_footnote() {
336        let content = "This has a footnote[^1].\n\n[^1]: The footnote content.";
337        let warnings = check_md066(content);
338        assert!(warnings.is_empty(), "Valid footnote should not warn: {warnings:?}");
339    }
340
341    #[test]
342    fn test_valid_multiple_footnotes() {
343        let content = r#"First footnote[^1] and second[^2].
344
345[^1]: First definition.
346[^2]: Second definition."#;
347        let warnings = check_md066(content);
348        assert!(warnings.is_empty(), "Valid footnotes should not warn: {warnings:?}");
349    }
350
351    #[test]
352    fn test_valid_named_footnotes() {
353        let content = r#"See the note[^note] and warning[^warning].
354
355[^note]: This is a note.
356[^warning]: This is a warning."#;
357        let warnings = check_md066(content);
358        assert!(warnings.is_empty(), "Named footnotes should not warn: {warnings:?}");
359    }
360
361    #[test]
362    fn test_valid_footnote_used_multiple_times() {
363        let content = r#"First[^1] and again[^1] and third[^1].
364
365[^1]: Used multiple times."#;
366        let warnings = check_md066(content);
367        assert!(warnings.is_empty(), "Reused footnote should not warn: {warnings:?}");
368    }
369
370    #[test]
371    fn test_valid_case_insensitive_matching() {
372        let content = r#"Reference[^NOTE].
373
374[^note]: Definition with different case."#;
375        let warnings = check_md066(content);
376        assert!(
377            warnings.is_empty(),
378            "Case-insensitive matching should work: {warnings:?}"
379        );
380    }
381
382    #[test]
383    fn test_no_footnotes_at_all() {
384        let content = "Just regular markdown without any footnotes.";
385        let warnings = check_md066(content);
386        assert!(warnings.is_empty(), "No footnotes should not warn");
387    }
388
389    // ==================== Orphaned references ====================
390
391    #[test]
392    fn test_orphaned_reference_single() {
393        let content = "This references[^missing] a non-existent footnote.";
394        let warnings = check_md066(content);
395        assert_eq!(warnings.len(), 1, "Should detect orphaned reference");
396        assert!(warnings[0].message.contains("missing"));
397        assert!(warnings[0].message.contains("no corresponding definition"));
398    }
399
400    #[test]
401    fn test_orphaned_reference_multiple() {
402        let content = r#"First[^a], second[^b], third[^c].
403
404[^b]: Only b is defined."#;
405        let warnings = check_md066(content);
406        assert_eq!(warnings.len(), 2, "Should detect 2 orphaned references: {warnings:?}");
407        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
408        assert!(messages.iter().any(|m| m.contains("[^a]")));
409        assert!(messages.iter().any(|m| m.contains("[^c]")));
410    }
411
412    #[test]
413    fn test_orphaned_reference_reports_first_occurrence() {
414        let content = "First[^missing] and again[^missing] and third[^missing].";
415        let warnings = check_md066(content);
416        // Should only report once per unique ID
417        assert_eq!(warnings.len(), 1, "Should report each orphaned ID once");
418        assert!(warnings[0].message.contains("missing"));
419    }
420
421    // ==================== Orphaned definitions ====================
422
423    #[test]
424    fn test_orphaned_definition_single() {
425        let content = "Regular text.\n\n[^unused]: This is never referenced.";
426        let warnings = check_md066(content);
427        assert_eq!(warnings.len(), 1, "Should detect orphaned definition");
428        assert!(warnings[0].message.contains("unused"));
429        assert!(warnings[0].message.contains("never referenced"));
430    }
431
432    #[test]
433    fn test_orphaned_definition_multiple() {
434        let content = r#"Using one[^used].
435
436[^used]: This is used.
437[^orphan1]: Never used.
438[^orphan2]: Also never used."#;
439        let warnings = check_md066(content);
440        assert_eq!(warnings.len(), 2, "Should detect 2 orphaned definitions: {warnings:?}");
441        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
442        assert!(messages.iter().any(|m| m.contains("orphan1")));
443        assert!(messages.iter().any(|m| m.contains("orphan2")));
444    }
445
446    // ==================== Mixed cases ====================
447
448    #[test]
449    fn test_both_orphaned_reference_and_definition() {
450        let content = r#"Reference[^missing].
451
452[^unused]: Never referenced."#;
453        let warnings = check_md066(content);
454        assert_eq!(
455            warnings.len(),
456            2,
457            "Should detect both orphaned ref and def: {warnings:?}"
458        );
459        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
460        assert!(
461            messages.iter().any(|m| m.contains("missing")),
462            "Should find missing ref"
463        );
464        assert!(messages.iter().any(|m| m.contains("unused")), "Should find unused def");
465    }
466
467    // ==================== Code block handling ====================
468
469    #[test]
470    fn test_footnote_in_code_block_ignored() {
471        let content = r#"```
472[^1]: This is in a code block
473```
474
475Regular text without footnotes."#;
476        let warnings = check_md066(content);
477        assert!(warnings.is_empty(), "Footnotes in code blocks should be ignored");
478    }
479
480    #[test]
481    fn test_footnote_reference_in_code_span_ignored() {
482        // Note: This depends on whether pulldown-cmark parses footnotes inside code spans
483        // If it does, we should skip them
484        let content = r#"Use `[^1]` syntax for footnotes.
485
486[^1]: This definition exists but the reference in backticks shouldn't count."#;
487        // This is tricky - if pulldown-cmark doesn't parse [^1] in backticks as a footnote ref,
488        // then the definition is orphaned
489        let warnings = check_md066(content);
490        // Expectation depends on parser behavior - test the actual behavior
491        assert_eq!(
492            warnings.len(),
493            1,
494            "Code span reference shouldn't count, definition is orphaned"
495        );
496        assert!(warnings[0].message.contains("never referenced"));
497    }
498
499    // ==================== Frontmatter handling ====================
500
501    #[test]
502    fn test_footnote_in_frontmatter_ignored() {
503        let content = r#"---
504note: "[^1]: yaml value"
505---
506
507Regular content."#;
508        let warnings = check_md066(content);
509        assert!(
510            warnings.is_empty(),
511            "Footnotes in frontmatter should be ignored: {warnings:?}"
512        );
513    }
514
515    // ==================== Edge cases ====================
516
517    #[test]
518    fn test_empty_document() {
519        let warnings = check_md066("");
520        assert!(warnings.is_empty());
521    }
522
523    #[test]
524    fn test_footnote_with_special_characters() {
525        let content = r#"Reference[^my-note_1].
526
527[^my-note_1]: Definition with special chars in ID."#;
528        let warnings = check_md066(content);
529        assert!(
530            warnings.is_empty(),
531            "Special characters in footnote ID should work: {warnings:?}"
532        );
533    }
534
535    #[test]
536    fn test_multiline_footnote_definition() {
537        let content = r#"Reference[^long].
538
539[^long]: This is a long footnote
540    that spans multiple lines
541    with proper indentation."#;
542        let warnings = check_md066(content);
543        assert!(
544            warnings.is_empty(),
545            "Multiline footnote definitions should work: {warnings:?}"
546        );
547    }
548
549    #[test]
550    fn test_footnote_at_end_of_sentence() {
551        let content = r#"This ends with a footnote[^1].
552
553[^1]: End of sentence footnote."#;
554        let warnings = check_md066(content);
555        assert!(warnings.is_empty());
556    }
557
558    #[test]
559    fn test_footnote_mid_sentence() {
560        let content = r#"Some text[^1] continues here.
561
562[^1]: Mid-sentence footnote."#;
563        let warnings = check_md066(content);
564        assert!(warnings.is_empty());
565    }
566
567    #[test]
568    fn test_adjacent_footnotes() {
569        let content = r#"Text[^1][^2] with adjacent footnotes.
570
571[^1]: First.
572[^2]: Second."#;
573        let warnings = check_md066(content);
574        assert!(warnings.is_empty(), "Adjacent footnotes should work: {warnings:?}");
575    }
576
577    #[test]
578    fn test_footnote_only_definitions_no_references() {
579        let content = r#"[^1]: First orphan.
580[^2]: Second orphan.
581[^3]: Third orphan."#;
582        let warnings = check_md066(content);
583        assert_eq!(warnings.len(), 3, "All definitions should be flagged: {warnings:?}");
584    }
585
586    #[test]
587    fn test_footnote_only_references_no_definitions() {
588        let content = "Text[^1] and[^2] and[^3].";
589        let warnings = check_md066(content);
590        assert_eq!(warnings.len(), 3, "All references should be flagged: {warnings:?}");
591    }
592
593    // ==================== Blockquote handling ====================
594
595    #[test]
596    fn test_footnote_in_blockquote_valid() {
597        let content = r#"> This has a footnote[^1].
598>
599> [^1]: Definition inside blockquote."#;
600        let warnings = check_md066(content);
601        assert!(
602            warnings.is_empty(),
603            "Footnotes inside blockquotes should be validated: {warnings:?}"
604        );
605    }
606
607    #[test]
608    fn test_footnote_in_nested_blockquote() {
609        let content = r#"> > Nested blockquote with footnote[^nested].
610> >
611> > [^nested]: Definition in nested blockquote."#;
612        let warnings = check_md066(content);
613        assert!(
614            warnings.is_empty(),
615            "Footnotes in nested blockquotes should work: {warnings:?}"
616        );
617    }
618
619    #[test]
620    fn test_footnote_blockquote_orphaned_reference() {
621        let content = r#"> This has an orphaned footnote[^missing].
622>
623> No definition here."#;
624        let warnings = check_md066(content);
625        assert_eq!(warnings.len(), 1, "Should detect orphaned ref in blockquote");
626        assert!(warnings[0].message.contains("missing"));
627    }
628
629    #[test]
630    fn test_footnote_blockquote_orphaned_definition() {
631        let content = r#"> Some text.
632>
633> [^unused]: Never referenced in blockquote."#;
634        let warnings = check_md066(content);
635        assert_eq!(warnings.len(), 1, "Should detect orphaned def in blockquote");
636        assert!(warnings[0].message.contains("unused"));
637    }
638
639    // ==================== Duplicate definitions ====================
640
641    #[test]
642    fn test_duplicate_definition_detected() {
643        let content = r#"Reference[^1].
644
645[^1]: First definition.
646[^1]: Second definition (duplicate)."#;
647        let warnings = check_md066(content);
648        assert_eq!(warnings.len(), 1, "Should detect duplicate definition: {warnings:?}");
649        assert!(warnings[0].message.contains("Duplicate"));
650        assert!(warnings[0].message.contains("[^1]"));
651    }
652
653    #[test]
654    fn test_multiple_duplicate_definitions() {
655        let content = r#"Reference[^dup].
656
657[^dup]: First.
658[^dup]: Second.
659[^dup]: Third."#;
660        let warnings = check_md066(content);
661        assert_eq!(warnings.len(), 2, "Should detect 2 duplicate definitions: {warnings:?}");
662        assert!(warnings.iter().all(|w| w.message.contains("Duplicate")));
663    }
664
665    #[test]
666    fn test_duplicate_definition_case_insensitive() {
667        let content = r#"Reference[^Note].
668
669[^note]: Lowercase definition.
670[^NOTE]: Uppercase definition (duplicate)."#;
671        let warnings = check_md066(content);
672        assert_eq!(warnings.len(), 1, "Case-insensitive duplicate detection: {warnings:?}");
673        assert!(warnings[0].message.contains("Duplicate"));
674    }
675
676    // ==================== HTML comment handling ====================
677
678    #[test]
679    fn test_footnote_reference_in_html_comment_ignored() {
680        let content = r#"<!-- This has [^1] in a comment -->
681
682Regular text without footnotes."#;
683        let warnings = check_md066(content);
684        assert!(
685            warnings.is_empty(),
686            "Footnote refs in HTML comments should be ignored: {warnings:?}"
687        );
688    }
689
690    #[test]
691    fn test_footnote_definition_in_html_comment_ignored() {
692        let content = r#"<!--
693[^1]: Definition in HTML comment
694-->
695
696Regular text."#;
697        let warnings = check_md066(content);
698        assert!(
699            warnings.is_empty(),
700            "Footnote defs in HTML comments should be ignored: {warnings:?}"
701        );
702    }
703
704    #[test]
705    fn test_footnote_outside_html_comment_still_validated() {
706        let content = r#"<!-- Just a comment -->
707
708Text with footnote[^1].
709
710[^1]: Valid definition outside comment."#;
711        let warnings = check_md066(content);
712        assert!(warnings.is_empty(), "Valid footnote outside comment: {warnings:?}");
713    }
714
715    #[test]
716    fn test_orphaned_ref_not_saved_by_def_in_comment() {
717        let content = r#"Text with orphaned[^missing].
718
719<!--
720[^missing]: This definition is in a comment, shouldn't count
721-->"#;
722        let warnings = check_md066(content);
723        assert_eq!(warnings.len(), 1, "Def in comment shouldn't satisfy ref: {warnings:?}");
724        assert!(warnings[0].message.contains("no corresponding definition"));
725    }
726
727    // ==================== HTML block handling ====================
728
729    #[test]
730    fn test_footnote_in_html_block_ignored() {
731        // Regex character classes like [^abc] should be ignored in HTML blocks
732        let content = r#"<table>
733<tr>
734<td><code>[^abc]</code></td>
735<td>Negated character class</td>
736</tr>
737</table>
738
739Regular markdown text."#;
740        let warnings = check_md066(content);
741        assert!(
742            warnings.is_empty(),
743            "Footnote-like patterns in HTML blocks should be ignored: {warnings:?}"
744        );
745    }
746
747    #[test]
748    fn test_footnote_in_html_table_ignored() {
749        let content = r#"| Header |
750|--------|
751| Cell   |
752
753<div>
754<p>This has <code>[^0-9]</code> regex pattern</p>
755</div>
756
757Normal text."#;
758        let warnings = check_md066(content);
759        assert!(
760            warnings.is_empty(),
761            "Regex patterns in HTML div should be ignored: {warnings:?}"
762        );
763    }
764
765    #[test]
766    fn test_real_footnote_outside_html_block() {
767        let content = r#"<div>
768Some HTML content
769</div>
770
771Text with real footnote[^1].
772
773[^1]: This is a real footnote definition."#;
774        let warnings = check_md066(content);
775        assert!(
776            warnings.is_empty(),
777            "Real footnote outside HTML block should work: {warnings:?}"
778        );
779    }
780
781    // ==================== Combined edge cases ====================
782
783    #[test]
784    fn test_blockquote_with_duplicate_definitions() {
785        let content = r#"> Text[^1].
786>
787> [^1]: First.
788> [^1]: Duplicate in blockquote."#;
789        let warnings = check_md066(content);
790        assert_eq!(warnings.len(), 1, "Should detect duplicate in blockquote: {warnings:?}");
791        assert!(warnings[0].message.contains("Duplicate"));
792    }
793
794    #[test]
795    fn test_all_enhancement_features_together() {
796        let content = r#"<!-- Comment with [^comment] -->
797
798Regular text[^valid] and[^missing].
799
800> Blockquote text[^bq].
801>
802> [^bq]: Blockquote definition.
803
804[^valid]: Valid definition.
805[^valid]: Duplicate definition.
806[^unused]: Never referenced."#;
807        let warnings = check_md066(content);
808        // Should find:
809        // 1. [^missing] - orphaned reference
810        // 2. [^valid] duplicate definition
811        // 3. [^unused] - orphaned definition
812        assert_eq!(warnings.len(), 3, "Should find all issues: {warnings:?}");
813
814        let messages: Vec<&str> = warnings.iter().map(|w| w.message.as_str()).collect();
815        assert!(
816            messages.iter().any(|m| m.contains("missing")),
817            "Should find orphaned ref"
818        );
819        assert!(
820            messages.iter().any(|m| m.contains("Duplicate")),
821            "Should find duplicate"
822        );
823        assert!(
824            messages.iter().any(|m| m.contains("unused")),
825            "Should find orphaned def"
826        );
827    }
828
829    #[test]
830    fn test_footnote_ref_at_end_of_file_no_newline() {
831        let content = "[^1]: Definition here.\n\nText with[^1]";
832        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
833        let rule = MD066FootnoteValidation;
834        let result = rule.check(&ctx).unwrap();
835        assert!(
836            result.is_empty(),
837            "Valid footnote pair without trailing newline should not warn: {result:?}"
838        );
839    }
840
841    #[test]
842    fn test_orphaned_footnote_ref_at_eof_no_newline() {
843        let content = "Text with[^missing]";
844        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
845        let rule = MD066FootnoteValidation;
846        let result = rule.check(&ctx).unwrap();
847        assert!(
848            !result.is_empty(),
849            "Orphaned ref at EOF without newline should warn: {result:?}"
850        );
851    }
852
853    #[test]
854    fn test_midline_footnote_ref_with_colon_detected_as_reference() {
855        // [^note]: mid-line is a reference followed by colon, NOT a definition
856        let content = "# Test\n\nI think [^note]: this is relevant.\n";
857        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
858        let rule = MD066FootnoteValidation;
859        let result = rule.check(&ctx).unwrap();
860        assert_eq!(
861            result.len(),
862            1,
863            "Mid-line [^note]: should be detected as undefined reference: {result:?}"
864        );
865        assert!(
866            result[0].message.contains("no corresponding definition"),
867            "Should warn about missing definition: {}",
868            result[0].message
869        );
870    }
871
872    #[test]
873    fn test_midline_footnote_ref_with_colon_matched_to_definition() {
874        // [^note]: mid-line is a reference; [^note]: at line start is the definition
875        let content = "# Test\n\nI think [^note]: this is relevant.\n\n[^note]: The actual definition.\n";
876        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
877        let rule = MD066FootnoteValidation;
878        let result = rule.check(&ctx).unwrap();
879        assert!(
880            result.is_empty(),
881            "Mid-line ref should match line-start definition: {result:?}"
882        );
883    }
884
885    #[test]
886    fn test_linestart_footnote_def_still_skipped_as_reference() {
887        // [^note]: at line start IS a definition and should NOT be counted as reference
888        let content = "# Test\n\n[^note]: The definition.\n";
889        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
890        let rule = MD066FootnoteValidation;
891        let result = rule.check(&ctx).unwrap();
892        // Should warn about orphaned definition (no reference)
893        assert_eq!(result.len(), 1, "Orphaned def should be flagged: {result:?}");
894        assert!(
895            result[0].message.contains("never referenced"),
896            "Should say 'never referenced': {}",
897            result[0].message
898        );
899    }
900
901    #[test]
902    fn test_indented_footnote_def_still_skipped() {
903        // [^note]: with 1-3 spaces indent is still a definition
904        let content = "# Test\n\n   [^note]: Indented definition.\n";
905        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
906        let rule = MD066FootnoteValidation;
907        let result = rule.check(&ctx).unwrap();
908        // Should be treated as an orphaned definition (no reference)
909        assert_eq!(result.len(), 1, "Indented def should still be detected: {result:?}");
910        assert!(
911            result[0].message.contains("never referenced"),
912            "Should say 'never referenced': {}",
913            result[0].message
914        );
915    }
916
917    #[test]
918    fn test_multiple_midline_refs_with_colons_on_same_line() {
919        // Both [^a]: and [^b]: mid-line should be counted as references
920        let content = "# Test\n\nText [^a]: and [^b]: more text.\n\n[^a]: Def A.\n[^b]: Def B.\n";
921        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
922        let rule = MD066FootnoteValidation;
923        let result = rule.check(&ctx).unwrap();
924        assert!(
925            result.is_empty(),
926            "Both mid-line refs should match their definitions: {result:?}"
927        );
928    }
929
930    #[test]
931    fn test_blockquote_footnote_def_still_skipped() {
932        // > [^note]: inside blockquote is a definition, not a reference
933        let content = "# Test\n\n> [^note]: Definition in blockquote.\n";
934        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
935        let rule = MD066FootnoteValidation;
936        let result = rule.check(&ctx).unwrap();
937        // Orphaned definition (no reference uses it)
938        assert_eq!(
939            result.len(),
940            1,
941            "Blockquote def should be detected as orphaned: {result:?}"
942        );
943        assert!(
944            result[0].message.contains("never referenced"),
945            "Should say 'never referenced': {}",
946            result[0].message
947        );
948    }
949
950    #[test]
951    fn test_list_item_footnote_ref_with_colon_is_reference() {
952        // - [^note]: inside a list item is a reference, not a definition
953        let content = "# Test\n\n- [^note]: list item text.\n\n[^note]: The actual definition.\n";
954        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
955        let rule = MD066FootnoteValidation;
956        let result = rule.check(&ctx).unwrap();
957        assert!(
958            result.is_empty(),
959            "List item [^note]: should be a ref matching the definition: {result:?}"
960        );
961    }
962
963    // ==================== Warning position tests ====================
964
965    #[test]
966    fn test_orphaned_reference_column_position() {
967        // "This references[^missing] a non-existent footnote."
968        //  column 16:     ^
969        let content = "This references[^missing] a non-existent footnote.";
970        let warnings = check_md066(content);
971        assert_eq!(warnings.len(), 1);
972        assert_eq!(warnings[0].line, 1);
973        assert_eq!(warnings[0].column, 16, "Column should point to '[^missing]'");
974        // "[^missing]" is 10 chars, so end_column = 16 + 10 = 26
975        assert_eq!(warnings[0].end_column, 26);
976    }
977
978    #[test]
979    fn test_orphaned_definition_column_position() {
980        // "[^unused]: Never referenced." starts at column 1
981        let content = "Regular text.\n\n[^unused]: Never referenced.";
982        let warnings = check_md066(content);
983        assert_eq!(warnings.len(), 1);
984        assert_eq!(warnings[0].line, 3);
985        assert_eq!(warnings[0].column, 1, "Definition at start of line");
986        // "[^unused]:" is 10 chars
987        assert_eq!(warnings[0].end_column, 11);
988    }
989
990    #[test]
991    fn test_duplicate_definition_column_position() {
992        let content = "Reference[^1].\n\n[^1]: First.\n[^1]: Second.";
993        let warnings = check_md066(content);
994        assert_eq!(warnings.len(), 1);
995        assert_eq!(warnings[0].line, 4);
996        assert_eq!(warnings[0].column, 1);
997        // "[^1]:" is 5 chars
998        assert_eq!(warnings[0].end_column, 6);
999    }
1000
1001    #[test]
1002    fn test_orphaned_definition_in_blockquote_column() {
1003        // "> [^unused]: Never referenced."
1004        //    ^ column 3 (after "> ")
1005        let content = "> Some text.\n>\n> [^unused]: Never referenced.";
1006        let warnings = check_md066(content);
1007        assert_eq!(warnings.len(), 1);
1008        assert_eq!(warnings[0].line, 3);
1009        assert_eq!(warnings[0].column, 3, "Should point past blockquote prefix");
1010    }
1011
1012    #[test]
1013    fn test_orphaned_reference_after_multibyte_chars() {
1014        // "日本語テキスト[^ref1] has no def."
1015        // "日本語テキスト" = 7 characters (each is 3 bytes in UTF-8)
1016        // Column should be 8 (character-based), not 22 (byte-based)
1017        let content = "日本語テキスト[^ref1] has no def.";
1018        let warnings = check_md066(content);
1019        assert_eq!(warnings.len(), 1);
1020        assert_eq!(
1021            warnings[0].column, 8,
1022            "Column should be character-based, not byte-based"
1023        );
1024        // "[^ref1]" = 7 chars
1025        assert_eq!(warnings[0].end_column, 15);
1026    }
1027
1028    #[test]
1029    fn test_orphaned_definition_with_indentation_column() {
1030        // "   [^note]:" — column should point to [^note]:, not the leading spaces
1031        let content = "# Heading\n\n   [^note]: Indented and orphaned.";
1032        let warnings = check_md066(content);
1033        assert_eq!(warnings.len(), 1);
1034        // "[^note]:" starts at column 4 (after 3 spaces)
1035        assert_eq!(warnings[0].column, 4);
1036        // "[^note]:" is 8 chars, end_column = 4 + 8 = 12
1037        assert_eq!(warnings[0].end_column, 12);
1038    }
1039
1040    #[test]
1041    fn test_orphaned_ref_end_column_uses_original_case() {
1042        // ref_id is stored lowercased, but end_column should reflect the actual source text
1043        let content = "Text with [^NOTE] here.";
1044        let warnings = check_md066(content);
1045        assert_eq!(warnings.len(), 1);
1046        // "Text with " = 10 chars, so [^NOTE] starts at column 11
1047        assert_eq!(warnings[0].column, 11);
1048        // "[^NOTE]" = 7 chars, end_column = 11 + 7 = 18
1049        assert_eq!(warnings[0].end_column, 18);
1050    }
1051}